1 /* 2 * Cavium ThunderX memory controller kernel module 3 * 4 * This file is subject to the terms and conditions of the GNU General Public 5 * License. See the file "COPYING" in the main directory of this archive 6 * for more details. 7 * 8 * Copyright Cavium, Inc. (C) 2015-2017. All rights reserved. 9 * 10 */ 11 12 #include <linux/module.h> 13 #include <linux/pci.h> 14 #include <linux/edac.h> 15 #include <linux/interrupt.h> 16 #include <linux/string.h> 17 #include <linux/stop_machine.h> 18 #include <linux/delay.h> 19 #include <linux/sizes.h> 20 #include <linux/atomic.h> 21 #include <linux/bitfield.h> 22 #include <linux/circ_buf.h> 23 24 #include <asm/page.h> 25 26 #include "edac_module.h" 27 28 #define phys_to_pfn(phys) (PFN_DOWN(phys)) 29 30 #define THUNDERX_NODE GENMASK(45, 44) 31 32 enum { 33 ERR_CORRECTED = 1, 34 ERR_UNCORRECTED = 2, 35 ERR_UNKNOWN = 3, 36 }; 37 38 #define MAX_SYNDROME_REGS 4 39 40 struct error_syndrome { 41 u64 reg[MAX_SYNDROME_REGS]; 42 }; 43 44 struct error_descr { 45 int type; 46 u64 mask; 47 char *descr; 48 }; 49 50 static void decode_register(char *str, size_t size, 51 const struct error_descr *descr, 52 const uint64_t reg) 53 { 54 int ret = 0; 55 56 while (descr->type && descr->mask && descr->descr) { 57 if (reg & descr->mask) { 58 ret = snprintf(str, size, "\n\t%s, %s", 59 descr->type == ERR_CORRECTED ? 60 "Corrected" : "Uncorrected", 61 descr->descr); 62 str += ret; 63 size -= ret; 64 } 65 descr++; 66 } 67 } 68 69 static unsigned long get_bits(unsigned long data, int pos, int width) 70 { 71 return (data >> pos) & ((1 << width) - 1); 72 } 73 74 #define L2C_CTL 0x87E080800000 75 #define L2C_CTL_DISIDXALIAS BIT(0) 76 77 #define PCI_DEVICE_ID_THUNDER_LMC 0xa022 78 79 #define LMC_FADR 0x20 80 #define LMC_FADR_FDIMM(x) ((x >> 37) & 0x1) 81 #define LMC_FADR_FBUNK(x) ((x >> 36) & 0x1) 82 #define LMC_FADR_FBANK(x) ((x >> 32) & 0xf) 83 #define LMC_FADR_FROW(x) ((x >> 14) & 0xffff) 84 #define LMC_FADR_FCOL(x) ((x >> 0) & 0x1fff) 85 86 #define LMC_NXM_FADR 0x28 87 #define LMC_ECC_SYND 0x38 88 89 #define LMC_ECC_PARITY_TEST 0x108 90 91 #define LMC_INT_W1S 0x150 92 93 #define LMC_INT_ENA_W1C 0x158 94 #define LMC_INT_ENA_W1S 0x160 95 96 #define LMC_CONFIG 0x188 97 98 #define LMC_CONFIG_BG2 BIT(62) 99 #define LMC_CONFIG_RANK_ENA BIT(42) 100 #define LMC_CONFIG_PBANK_LSB(x) (((x) >> 5) & 0xF) 101 #define LMC_CONFIG_ROW_LSB(x) (((x) >> 2) & 0x7) 102 103 #define LMC_CONTROL 0x190 104 #define LMC_CONTROL_XOR_BANK BIT(16) 105 106 #define LMC_INT 0x1F0 107 108 #define LMC_INT_DDR_ERR BIT(11) 109 #define LMC_INT_DED_ERR (0xFUL << 5) 110 #define LMC_INT_SEC_ERR (0xFUL << 1) 111 #define LMC_INT_NXM_WR_MASK BIT(0) 112 113 #define LMC_DDR_PLL_CTL 0x258 114 #define LMC_DDR_PLL_CTL_DDR4 BIT(29) 115 116 #define LMC_FADR_SCRAMBLED 0x330 117 118 #define LMC_INT_UE (LMC_INT_DDR_ERR | LMC_INT_DED_ERR | \ 119 LMC_INT_NXM_WR_MASK) 120 121 #define LMC_INT_CE (LMC_INT_SEC_ERR) 122 123 static const struct error_descr lmc_errors[] = { 124 { 125 .type = ERR_CORRECTED, 126 .mask = LMC_INT_SEC_ERR, 127 .descr = "Single-bit ECC error", 128 }, 129 { 130 .type = ERR_UNCORRECTED, 131 .mask = LMC_INT_DDR_ERR, 132 .descr = "DDR chip error", 133 }, 134 { 135 .type = ERR_UNCORRECTED, 136 .mask = LMC_INT_DED_ERR, 137 .descr = "Double-bit ECC error", 138 }, 139 { 140 .type = ERR_UNCORRECTED, 141 .mask = LMC_INT_NXM_WR_MASK, 142 .descr = "Non-existent memory write", 143 }, 144 {0, 0, NULL}, 145 }; 146 147 #define LMC_INT_EN_DDR_ERROR_ALERT_ENA BIT(5) 148 #define LMC_INT_EN_DLCRAM_DED_ERR BIT(4) 149 #define LMC_INT_EN_DLCRAM_SEC_ERR BIT(3) 150 #define LMC_INT_INTR_DED_ENA BIT(2) 151 #define LMC_INT_INTR_SEC_ENA BIT(1) 152 #define LMC_INT_INTR_NXM_WR_ENA BIT(0) 153 154 #define LMC_INT_ENA_ALL GENMASK(5, 0) 155 156 #define LMC_DDR_PLL_CTL 0x258 157 #define LMC_DDR_PLL_CTL_DDR4 BIT(29) 158 159 #define LMC_CONTROL 0x190 160 #define LMC_CONTROL_RDIMM BIT(0) 161 162 #define LMC_SCRAM_FADR 0x330 163 164 #define LMC_CHAR_MASK0 0x228 165 #define LMC_CHAR_MASK2 0x238 166 167 #define RING_ENTRIES 8 168 169 struct debugfs_entry { 170 const char *name; 171 umode_t mode; 172 const struct file_operations fops; 173 }; 174 175 struct lmc_err_ctx { 176 u64 reg_int; 177 u64 reg_fadr; 178 u64 reg_nxm_fadr; 179 u64 reg_scram_fadr; 180 u64 reg_ecc_synd; 181 }; 182 183 struct thunderx_lmc { 184 void __iomem *regs; 185 struct pci_dev *pdev; 186 struct msix_entry msix_ent; 187 188 atomic_t ecc_int; 189 190 u64 mask0; 191 u64 mask2; 192 u64 parity_test; 193 u64 node; 194 195 int xbits; 196 int bank_width; 197 int pbank_lsb; 198 int dimm_lsb; 199 int rank_lsb; 200 int bank_lsb; 201 int row_lsb; 202 int col_hi_lsb; 203 204 int xor_bank; 205 int l2c_alias; 206 207 struct page *mem; 208 209 struct lmc_err_ctx err_ctx[RING_ENTRIES]; 210 unsigned long ring_head; 211 unsigned long ring_tail; 212 }; 213 214 #define ring_pos(pos, size) ((pos) & (size - 1)) 215 216 #define DEBUGFS_STRUCT(_name, _mode, _write, _read) \ 217 static struct debugfs_entry debugfs_##_name = { \ 218 .name = __stringify(_name), \ 219 .mode = VERIFY_OCTAL_PERMISSIONS(_mode), \ 220 .fops = { \ 221 .open = simple_open, \ 222 .write = _write, \ 223 .read = _read, \ 224 .llseek = generic_file_llseek, \ 225 }, \ 226 } 227 228 #define DEBUGFS_FIELD_ATTR(_type, _field) \ 229 static ssize_t thunderx_##_type##_##_field##_read(struct file *file, \ 230 char __user *data, \ 231 size_t count, loff_t *ppos) \ 232 { \ 233 struct thunderx_##_type *pdata = file->private_data; \ 234 char buf[20]; \ 235 \ 236 snprintf(buf, count, "0x%016llx", pdata->_field); \ 237 return simple_read_from_buffer(data, count, ppos, \ 238 buf, sizeof(buf)); \ 239 } \ 240 \ 241 static ssize_t thunderx_##_type##_##_field##_write(struct file *file, \ 242 const char __user *data, \ 243 size_t count, loff_t *ppos) \ 244 { \ 245 struct thunderx_##_type *pdata = file->private_data; \ 246 int res; \ 247 \ 248 res = kstrtoull_from_user(data, count, 0, &pdata->_field); \ 249 \ 250 return res ? res : count; \ 251 } \ 252 \ 253 DEBUGFS_STRUCT(_field, 0600, \ 254 thunderx_##_type##_##_field##_write, \ 255 thunderx_##_type##_##_field##_read) \ 256 257 #define DEBUGFS_REG_ATTR(_type, _name, _reg) \ 258 static ssize_t thunderx_##_type##_##_name##_read(struct file *file, \ 259 char __user *data, \ 260 size_t count, loff_t *ppos) \ 261 { \ 262 struct thunderx_##_type *pdata = file->private_data; \ 263 char buf[20]; \ 264 \ 265 sprintf(buf, "0x%016llx", readq(pdata->regs + _reg)); \ 266 return simple_read_from_buffer(data, count, ppos, \ 267 buf, sizeof(buf)); \ 268 } \ 269 \ 270 static ssize_t thunderx_##_type##_##_name##_write(struct file *file, \ 271 const char __user *data, \ 272 size_t count, loff_t *ppos) \ 273 { \ 274 struct thunderx_##_type *pdata = file->private_data; \ 275 u64 val; \ 276 int res; \ 277 \ 278 res = kstrtoull_from_user(data, count, 0, &val); \ 279 \ 280 if (!res) { \ 281 writeq(val, pdata->regs + _reg); \ 282 res = count; \ 283 } \ 284 \ 285 return res; \ 286 } \ 287 \ 288 DEBUGFS_STRUCT(_name, 0600, \ 289 thunderx_##_type##_##_name##_write, \ 290 thunderx_##_type##_##_name##_read) 291 292 #define LMC_DEBUGFS_ENT(_field) DEBUGFS_FIELD_ATTR(lmc, _field) 293 294 /* 295 * To get an ECC error injected, the following steps are needed: 296 * - Setup the ECC injection by writing the appropriate parameters: 297 * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask0 298 * echo <bit mask value> > /sys/kernel/debug/<device number>/ecc_mask2 299 * echo 0x802 > /sys/kernel/debug/<device number>/ecc_parity_test 300 * - Do the actual injection: 301 * echo 1 > /sys/kernel/debug/<device number>/inject_ecc 302 */ 303 static ssize_t thunderx_lmc_inject_int_write(struct file *file, 304 const char __user *data, 305 size_t count, loff_t *ppos) 306 { 307 struct thunderx_lmc *lmc = file->private_data; 308 u64 val; 309 int res; 310 311 res = kstrtoull_from_user(data, count, 0, &val); 312 313 if (!res) { 314 /* Trigger the interrupt */ 315 writeq(val, lmc->regs + LMC_INT_W1S); 316 res = count; 317 } 318 319 return res; 320 } 321 322 static ssize_t thunderx_lmc_int_read(struct file *file, 323 char __user *data, 324 size_t count, loff_t *ppos) 325 { 326 struct thunderx_lmc *lmc = file->private_data; 327 char buf[20]; 328 u64 lmc_int = readq(lmc->regs + LMC_INT); 329 330 snprintf(buf, sizeof(buf), "0x%016llx", lmc_int); 331 return simple_read_from_buffer(data, count, ppos, buf, sizeof(buf)); 332 } 333 334 #define TEST_PATTERN 0xa5 335 336 static int inject_ecc_fn(void *arg) 337 { 338 struct thunderx_lmc *lmc = arg; 339 uintptr_t addr, phys; 340 unsigned int cline_size = cache_line_size(); 341 const unsigned int lines = PAGE_SIZE / cline_size; 342 unsigned int i, cl_idx; 343 344 addr = (uintptr_t)page_address(lmc->mem); 345 phys = (uintptr_t)page_to_phys(lmc->mem); 346 347 cl_idx = (phys & 0x7f) >> 4; 348 lmc->parity_test &= ~(7ULL << 8); 349 lmc->parity_test |= (cl_idx << 8); 350 351 writeq(lmc->mask0, lmc->regs + LMC_CHAR_MASK0); 352 writeq(lmc->mask2, lmc->regs + LMC_CHAR_MASK2); 353 writeq(lmc->parity_test, lmc->regs + LMC_ECC_PARITY_TEST); 354 355 readq(lmc->regs + LMC_CHAR_MASK0); 356 readq(lmc->regs + LMC_CHAR_MASK2); 357 readq(lmc->regs + LMC_ECC_PARITY_TEST); 358 359 for (i = 0; i < lines; i++) { 360 memset((void *)addr, TEST_PATTERN, cline_size); 361 barrier(); 362 363 /* 364 * Flush L1 cachelines to the PoC (L2). 365 * This will cause cacheline eviction to the L2. 366 */ 367 asm volatile("dc civac, %0\n" 368 "dsb sy\n" 369 : : "r"(addr + i * cline_size)); 370 } 371 372 for (i = 0; i < lines; i++) { 373 /* 374 * Flush L2 cachelines to the DRAM. 375 * This will cause cacheline eviction to the DRAM 376 * and ECC corruption according to the masks set. 377 */ 378 __asm__ volatile("sys #0,c11,C1,#2, %0\n" 379 : : "r"(phys + i * cline_size)); 380 } 381 382 for (i = 0; i < lines; i++) { 383 /* 384 * Invalidate L2 cachelines. 385 * The subsequent load will cause cacheline fetch 386 * from the DRAM and an error interrupt 387 */ 388 __asm__ volatile("sys #0,c11,C1,#1, %0" 389 : : "r"(phys + i * cline_size)); 390 } 391 392 for (i = 0; i < lines; i++) { 393 /* 394 * Invalidate L1 cachelines. 395 * The subsequent load will cause cacheline fetch 396 * from the L2 and/or DRAM 397 */ 398 asm volatile("dc ivac, %0\n" 399 "dsb sy\n" 400 : : "r"(addr + i * cline_size)); 401 } 402 403 return 0; 404 } 405 406 static ssize_t thunderx_lmc_inject_ecc_write(struct file *file, 407 const char __user *data, 408 size_t count, loff_t *ppos) 409 { 410 struct thunderx_lmc *lmc = file->private_data; 411 unsigned int cline_size = cache_line_size(); 412 u8 *tmp; 413 void __iomem *addr; 414 unsigned int offs, timeout = 100000; 415 416 atomic_set(&lmc->ecc_int, 0); 417 418 lmc->mem = alloc_pages_node(lmc->node, GFP_KERNEL, 0); 419 if (!lmc->mem) 420 return -ENOMEM; 421 422 tmp = kmalloc(cline_size, GFP_KERNEL); 423 if (!tmp) { 424 __free_pages(lmc->mem, 0); 425 return -ENOMEM; 426 } 427 428 addr = page_address(lmc->mem); 429 430 while (!atomic_read(&lmc->ecc_int) && timeout--) { 431 stop_machine(inject_ecc_fn, lmc, NULL); 432 433 for (offs = 0; offs < PAGE_SIZE; offs += cline_size) { 434 /* 435 * Do a load from the previously rigged location 436 * This should generate an error interrupt. 437 */ 438 memcpy(tmp, addr + offs, cline_size); 439 asm volatile("dsb ld\n"); 440 } 441 } 442 443 kfree(tmp); 444 __free_pages(lmc->mem, 0); 445 446 return count; 447 } 448 449 LMC_DEBUGFS_ENT(mask0); 450 LMC_DEBUGFS_ENT(mask2); 451 LMC_DEBUGFS_ENT(parity_test); 452 453 DEBUGFS_STRUCT(inject_int, 0200, thunderx_lmc_inject_int_write, NULL); 454 DEBUGFS_STRUCT(inject_ecc, 0200, thunderx_lmc_inject_ecc_write, NULL); 455 DEBUGFS_STRUCT(int_w1c, 0400, NULL, thunderx_lmc_int_read); 456 457 static struct debugfs_entry *lmc_dfs_ents[] = { 458 &debugfs_mask0, 459 &debugfs_mask2, 460 &debugfs_parity_test, 461 &debugfs_inject_ecc, 462 &debugfs_inject_int, 463 &debugfs_int_w1c, 464 }; 465 466 static int thunderx_create_debugfs_nodes(struct dentry *parent, 467 struct debugfs_entry *attrs[], 468 void *data, 469 size_t num) 470 { 471 int i; 472 struct dentry *ent; 473 474 if (!IS_ENABLED(CONFIG_EDAC_DEBUG)) 475 return 0; 476 477 if (!parent) 478 return -ENOENT; 479 480 for (i = 0; i < num; i++) { 481 ent = edac_debugfs_create_file(attrs[i]->name, attrs[i]->mode, 482 parent, data, &attrs[i]->fops); 483 484 if (IS_ERR(ent)) 485 break; 486 } 487 488 return i; 489 } 490 491 static phys_addr_t thunderx_faddr_to_phys(u64 faddr, struct thunderx_lmc *lmc) 492 { 493 phys_addr_t addr = 0; 494 int bank, xbits; 495 496 addr |= lmc->node << 40; 497 addr |= LMC_FADR_FDIMM(faddr) << lmc->dimm_lsb; 498 addr |= LMC_FADR_FBUNK(faddr) << lmc->rank_lsb; 499 addr |= LMC_FADR_FROW(faddr) << lmc->row_lsb; 500 addr |= (LMC_FADR_FCOL(faddr) >> 4) << lmc->col_hi_lsb; 501 502 bank = LMC_FADR_FBANK(faddr) << lmc->bank_lsb; 503 504 if (lmc->xor_bank) 505 bank ^= get_bits(addr, 12 + lmc->xbits, lmc->bank_width); 506 507 addr |= bank << lmc->bank_lsb; 508 509 xbits = PCI_FUNC(lmc->pdev->devfn); 510 511 if (lmc->l2c_alias) 512 xbits ^= get_bits(addr, 20, lmc->xbits) ^ 513 get_bits(addr, 12, lmc->xbits); 514 515 addr |= xbits << 7; 516 517 return addr; 518 } 519 520 static unsigned int thunderx_get_num_lmcs(unsigned int node) 521 { 522 unsigned int number = 0; 523 struct pci_dev *pdev = NULL; 524 525 do { 526 pdev = pci_get_device(PCI_VENDOR_ID_CAVIUM, 527 PCI_DEVICE_ID_THUNDER_LMC, 528 pdev); 529 if (pdev) { 530 #ifdef CONFIG_NUMA 531 if (pdev->dev.numa_node == node) 532 number++; 533 #else 534 number++; 535 #endif 536 } 537 } while (pdev); 538 539 return number; 540 } 541 542 #define LMC_MESSAGE_SIZE 120 543 #define LMC_OTHER_SIZE (50 * ARRAY_SIZE(lmc_errors)) 544 545 static irqreturn_t thunderx_lmc_err_isr(int irq, void *dev_id) 546 { 547 struct mem_ctl_info *mci = dev_id; 548 struct thunderx_lmc *lmc = mci->pvt_info; 549 550 unsigned long head = ring_pos(lmc->ring_head, ARRAY_SIZE(lmc->err_ctx)); 551 struct lmc_err_ctx *ctx = &lmc->err_ctx[head]; 552 553 writeq(0, lmc->regs + LMC_CHAR_MASK0); 554 writeq(0, lmc->regs + LMC_CHAR_MASK2); 555 writeq(0x2, lmc->regs + LMC_ECC_PARITY_TEST); 556 557 ctx->reg_int = readq(lmc->regs + LMC_INT); 558 ctx->reg_fadr = readq(lmc->regs + LMC_FADR); 559 ctx->reg_nxm_fadr = readq(lmc->regs + LMC_NXM_FADR); 560 ctx->reg_scram_fadr = readq(lmc->regs + LMC_SCRAM_FADR); 561 ctx->reg_ecc_synd = readq(lmc->regs + LMC_ECC_SYND); 562 563 lmc->ring_head++; 564 565 atomic_set(&lmc->ecc_int, 1); 566 567 /* Clear the interrupt */ 568 writeq(ctx->reg_int, lmc->regs + LMC_INT); 569 570 return IRQ_WAKE_THREAD; 571 } 572 573 static irqreturn_t thunderx_lmc_threaded_isr(int irq, void *dev_id) 574 { 575 struct mem_ctl_info *mci = dev_id; 576 struct thunderx_lmc *lmc = mci->pvt_info; 577 phys_addr_t phys_addr; 578 579 unsigned long tail; 580 struct lmc_err_ctx *ctx; 581 582 irqreturn_t ret = IRQ_NONE; 583 584 char *msg; 585 char *other; 586 587 msg = kmalloc(LMC_MESSAGE_SIZE, GFP_KERNEL); 588 other = kmalloc(LMC_OTHER_SIZE, GFP_KERNEL); 589 590 if (!msg || !other) 591 goto err_free; 592 593 while (CIRC_CNT(lmc->ring_head, lmc->ring_tail, 594 ARRAY_SIZE(lmc->err_ctx))) { 595 tail = ring_pos(lmc->ring_tail, ARRAY_SIZE(lmc->err_ctx)); 596 597 ctx = &lmc->err_ctx[tail]; 598 599 dev_dbg(&lmc->pdev->dev, "LMC_INT: %016llx\n", 600 ctx->reg_int); 601 dev_dbg(&lmc->pdev->dev, "LMC_FADR: %016llx\n", 602 ctx->reg_fadr); 603 dev_dbg(&lmc->pdev->dev, "LMC_NXM_FADR: %016llx\n", 604 ctx->reg_nxm_fadr); 605 dev_dbg(&lmc->pdev->dev, "LMC_SCRAM_FADR: %016llx\n", 606 ctx->reg_scram_fadr); 607 dev_dbg(&lmc->pdev->dev, "LMC_ECC_SYND: %016llx\n", 608 ctx->reg_ecc_synd); 609 610 snprintf(msg, LMC_MESSAGE_SIZE, 611 "DIMM %lld rank %lld bank %lld row %lld col %lld", 612 LMC_FADR_FDIMM(ctx->reg_scram_fadr), 613 LMC_FADR_FBUNK(ctx->reg_scram_fadr), 614 LMC_FADR_FBANK(ctx->reg_scram_fadr), 615 LMC_FADR_FROW(ctx->reg_scram_fadr), 616 LMC_FADR_FCOL(ctx->reg_scram_fadr)); 617 618 decode_register(other, LMC_OTHER_SIZE, lmc_errors, 619 ctx->reg_int); 620 621 phys_addr = thunderx_faddr_to_phys(ctx->reg_fadr, lmc); 622 623 if (ctx->reg_int & LMC_INT_UE) 624 edac_mc_handle_error(HW_EVENT_ERR_UNCORRECTED, mci, 1, 625 phys_to_pfn(phys_addr), 626 offset_in_page(phys_addr), 627 0, -1, -1, -1, msg, other); 628 else if (ctx->reg_int & LMC_INT_CE) 629 edac_mc_handle_error(HW_EVENT_ERR_CORRECTED, mci, 1, 630 phys_to_pfn(phys_addr), 631 offset_in_page(phys_addr), 632 0, -1, -1, -1, msg, other); 633 634 lmc->ring_tail++; 635 } 636 637 ret = IRQ_HANDLED; 638 639 err_free: 640 kfree(msg); 641 kfree(other); 642 643 return ret; 644 } 645 646 static const struct pci_device_id thunderx_lmc_pci_tbl[] = { 647 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_LMC) }, 648 { 0, }, 649 }; 650 651 static inline int pci_dev_to_mc_idx(struct pci_dev *pdev) 652 { 653 int node = dev_to_node(&pdev->dev); 654 int ret = PCI_FUNC(pdev->devfn); 655 656 ret += max(node, 0) << 3; 657 658 return ret; 659 } 660 661 static int thunderx_lmc_probe(struct pci_dev *pdev, 662 const struct pci_device_id *id) 663 { 664 struct thunderx_lmc *lmc; 665 struct edac_mc_layer layer; 666 struct mem_ctl_info *mci; 667 u64 lmc_control, lmc_ddr_pll_ctl, lmc_config; 668 int ret; 669 u64 lmc_int; 670 void *l2c_ioaddr; 671 672 layer.type = EDAC_MC_LAYER_SLOT; 673 layer.size = 2; 674 layer.is_virt_csrow = false; 675 676 ret = pcim_enable_device(pdev); 677 if (ret) { 678 dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); 679 return ret; 680 } 681 682 ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_lmc"); 683 if (ret) { 684 dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); 685 return ret; 686 } 687 688 mci = edac_mc_alloc(pci_dev_to_mc_idx(pdev), 1, &layer, 689 sizeof(struct thunderx_lmc)); 690 if (!mci) 691 return -ENOMEM; 692 693 mci->pdev = &pdev->dev; 694 lmc = mci->pvt_info; 695 696 pci_set_drvdata(pdev, mci); 697 698 lmc->regs = pcim_iomap_table(pdev)[0]; 699 700 lmc_control = readq(lmc->regs + LMC_CONTROL); 701 lmc_ddr_pll_ctl = readq(lmc->regs + LMC_DDR_PLL_CTL); 702 lmc_config = readq(lmc->regs + LMC_CONFIG); 703 704 if (lmc_control & LMC_CONTROL_RDIMM) { 705 mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4, 706 lmc_ddr_pll_ctl) ? 707 MEM_RDDR4 : MEM_RDDR3; 708 } else { 709 mci->mtype_cap = FIELD_GET(LMC_DDR_PLL_CTL_DDR4, 710 lmc_ddr_pll_ctl) ? 711 MEM_DDR4 : MEM_DDR3; 712 } 713 714 mci->edac_ctl_cap = EDAC_FLAG_NONE | EDAC_FLAG_SECDED; 715 mci->edac_cap = EDAC_FLAG_SECDED; 716 717 mci->mod_name = "thunderx-lmc"; 718 mci->ctl_name = "thunderx-lmc"; 719 mci->dev_name = dev_name(&pdev->dev); 720 mci->scrub_mode = SCRUB_NONE; 721 722 lmc->pdev = pdev; 723 lmc->msix_ent.entry = 0; 724 725 lmc->ring_head = 0; 726 lmc->ring_tail = 0; 727 728 ret = pci_enable_msix_exact(pdev, &lmc->msix_ent, 1); 729 if (ret) { 730 dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); 731 goto err_free; 732 } 733 734 ret = devm_request_threaded_irq(&pdev->dev, lmc->msix_ent.vector, 735 thunderx_lmc_err_isr, 736 thunderx_lmc_threaded_isr, 0, 737 "[EDAC] ThunderX LMC", mci); 738 if (ret) { 739 dev_err(&pdev->dev, "Cannot set ISR: %d\n", ret); 740 goto err_free; 741 } 742 743 lmc->node = FIELD_GET(THUNDERX_NODE, pci_resource_start(pdev, 0)); 744 745 lmc->xbits = thunderx_get_num_lmcs(lmc->node) >> 1; 746 lmc->bank_width = (FIELD_GET(LMC_DDR_PLL_CTL_DDR4, lmc_ddr_pll_ctl) && 747 FIELD_GET(LMC_CONFIG_BG2, lmc_config)) ? 4 : 3; 748 749 lmc->pbank_lsb = (lmc_config >> 5) & 0xf; 750 lmc->dimm_lsb = 28 + lmc->pbank_lsb + lmc->xbits; 751 lmc->rank_lsb = lmc->dimm_lsb; 752 lmc->rank_lsb -= FIELD_GET(LMC_CONFIG_RANK_ENA, lmc_config) ? 1 : 0; 753 lmc->bank_lsb = 7 + lmc->xbits; 754 lmc->row_lsb = 14 + LMC_CONFIG_ROW_LSB(lmc_config) + lmc->xbits; 755 756 lmc->col_hi_lsb = lmc->bank_lsb + lmc->bank_width; 757 758 lmc->xor_bank = lmc_control & LMC_CONTROL_XOR_BANK; 759 760 l2c_ioaddr = ioremap(L2C_CTL | FIELD_PREP(THUNDERX_NODE, lmc->node), PAGE_SIZE); 761 if (!l2c_ioaddr) { 762 dev_err(&pdev->dev, "Cannot map L2C_CTL\n"); 763 ret = -ENOMEM; 764 goto err_free; 765 } 766 767 lmc->l2c_alias = !(readq(l2c_ioaddr) & L2C_CTL_DISIDXALIAS); 768 769 iounmap(l2c_ioaddr); 770 771 ret = edac_mc_add_mc(mci); 772 if (ret) { 773 dev_err(&pdev->dev, "Cannot add the MC: %d\n", ret); 774 goto err_free; 775 } 776 777 lmc_int = readq(lmc->regs + LMC_INT); 778 writeq(lmc_int, lmc->regs + LMC_INT); 779 780 writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1S); 781 782 if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { 783 ret = thunderx_create_debugfs_nodes(mci->debugfs, 784 lmc_dfs_ents, 785 lmc, 786 ARRAY_SIZE(lmc_dfs_ents)); 787 788 if (ret != ARRAY_SIZE(lmc_dfs_ents)) { 789 dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", 790 ret, ret >= 0 ? " created" : ""); 791 } 792 } 793 794 return 0; 795 796 err_free: 797 pci_set_drvdata(pdev, NULL); 798 edac_mc_free(mci); 799 800 return ret; 801 } 802 803 static void thunderx_lmc_remove(struct pci_dev *pdev) 804 { 805 struct mem_ctl_info *mci = pci_get_drvdata(pdev); 806 struct thunderx_lmc *lmc = mci->pvt_info; 807 808 writeq(LMC_INT_ENA_ALL, lmc->regs + LMC_INT_ENA_W1C); 809 810 edac_mc_del_mc(&pdev->dev); 811 edac_mc_free(mci); 812 } 813 814 MODULE_DEVICE_TABLE(pci, thunderx_lmc_pci_tbl); 815 816 static struct pci_driver thunderx_lmc_driver = { 817 .name = "thunderx_lmc_edac", 818 .probe = thunderx_lmc_probe, 819 .remove = thunderx_lmc_remove, 820 .id_table = thunderx_lmc_pci_tbl, 821 }; 822 823 /*---------------------- OCX driver ---------------------------------*/ 824 825 #define PCI_DEVICE_ID_THUNDER_OCX 0xa013 826 827 #define OCX_LINK_INTS 3 828 #define OCX_INTS (OCX_LINK_INTS + 1) 829 #define OCX_RX_LANES 24 830 #define OCX_RX_LANE_STATS 15 831 832 #define OCX_COM_INT 0x100 833 #define OCX_COM_INT_W1S 0x108 834 #define OCX_COM_INT_ENA_W1S 0x110 835 #define OCX_COM_INT_ENA_W1C 0x118 836 837 #define OCX_COM_IO_BADID BIT(54) 838 #define OCX_COM_MEM_BADID BIT(53) 839 #define OCX_COM_COPR_BADID BIT(52) 840 #define OCX_COM_WIN_REQ_BADID BIT(51) 841 #define OCX_COM_WIN_REQ_TOUT BIT(50) 842 #define OCX_COM_RX_LANE GENMASK(23, 0) 843 844 #define OCX_COM_INT_CE (OCX_COM_IO_BADID | \ 845 OCX_COM_MEM_BADID | \ 846 OCX_COM_COPR_BADID | \ 847 OCX_COM_WIN_REQ_BADID | \ 848 OCX_COM_WIN_REQ_TOUT) 849 850 static const struct error_descr ocx_com_errors[] = { 851 { 852 .type = ERR_CORRECTED, 853 .mask = OCX_COM_IO_BADID, 854 .descr = "Invalid IO transaction node ID", 855 }, 856 { 857 .type = ERR_CORRECTED, 858 .mask = OCX_COM_MEM_BADID, 859 .descr = "Invalid memory transaction node ID", 860 }, 861 { 862 .type = ERR_CORRECTED, 863 .mask = OCX_COM_COPR_BADID, 864 .descr = "Invalid coprocessor transaction node ID", 865 }, 866 { 867 .type = ERR_CORRECTED, 868 .mask = OCX_COM_WIN_REQ_BADID, 869 .descr = "Invalid SLI transaction node ID", 870 }, 871 { 872 .type = ERR_CORRECTED, 873 .mask = OCX_COM_WIN_REQ_TOUT, 874 .descr = "Window/core request timeout", 875 }, 876 {0, 0, NULL}, 877 }; 878 879 #define OCX_COM_LINKX_INT(x) (0x120 + (x) * 8) 880 #define OCX_COM_LINKX_INT_W1S(x) (0x140 + (x) * 8) 881 #define OCX_COM_LINKX_INT_ENA_W1S(x) (0x160 + (x) * 8) 882 #define OCX_COM_LINKX_INT_ENA_W1C(x) (0x180 + (x) * 8) 883 884 #define OCX_COM_LINK_BAD_WORD BIT(13) 885 #define OCX_COM_LINK_ALIGN_FAIL BIT(12) 886 #define OCX_COM_LINK_ALIGN_DONE BIT(11) 887 #define OCX_COM_LINK_UP BIT(10) 888 #define OCX_COM_LINK_STOP BIT(9) 889 #define OCX_COM_LINK_BLK_ERR BIT(8) 890 #define OCX_COM_LINK_REINIT BIT(7) 891 #define OCX_COM_LINK_LNK_DATA BIT(6) 892 #define OCX_COM_LINK_RXFIFO_DBE BIT(5) 893 #define OCX_COM_LINK_RXFIFO_SBE BIT(4) 894 #define OCX_COM_LINK_TXFIFO_DBE BIT(3) 895 #define OCX_COM_LINK_TXFIFO_SBE BIT(2) 896 #define OCX_COM_LINK_REPLAY_DBE BIT(1) 897 #define OCX_COM_LINK_REPLAY_SBE BIT(0) 898 899 static const struct error_descr ocx_com_link_errors[] = { 900 { 901 .type = ERR_CORRECTED, 902 .mask = OCX_COM_LINK_REPLAY_SBE, 903 .descr = "Replay buffer single-bit error", 904 }, 905 { 906 .type = ERR_CORRECTED, 907 .mask = OCX_COM_LINK_TXFIFO_SBE, 908 .descr = "TX FIFO single-bit error", 909 }, 910 { 911 .type = ERR_CORRECTED, 912 .mask = OCX_COM_LINK_RXFIFO_SBE, 913 .descr = "RX FIFO single-bit error", 914 }, 915 { 916 .type = ERR_CORRECTED, 917 .mask = OCX_COM_LINK_BLK_ERR, 918 .descr = "Block code error", 919 }, 920 { 921 .type = ERR_CORRECTED, 922 .mask = OCX_COM_LINK_ALIGN_FAIL, 923 .descr = "Link alignment failure", 924 }, 925 { 926 .type = ERR_CORRECTED, 927 .mask = OCX_COM_LINK_BAD_WORD, 928 .descr = "Bad code word", 929 }, 930 { 931 .type = ERR_UNCORRECTED, 932 .mask = OCX_COM_LINK_REPLAY_DBE, 933 .descr = "Replay buffer double-bit error", 934 }, 935 { 936 .type = ERR_UNCORRECTED, 937 .mask = OCX_COM_LINK_TXFIFO_DBE, 938 .descr = "TX FIFO double-bit error", 939 }, 940 { 941 .type = ERR_UNCORRECTED, 942 .mask = OCX_COM_LINK_RXFIFO_DBE, 943 .descr = "RX FIFO double-bit error", 944 }, 945 { 946 .type = ERR_UNCORRECTED, 947 .mask = OCX_COM_LINK_STOP, 948 .descr = "Link stopped", 949 }, 950 {0, 0, NULL}, 951 }; 952 953 #define OCX_COM_LINK_INT_UE (OCX_COM_LINK_REPLAY_DBE | \ 954 OCX_COM_LINK_TXFIFO_DBE | \ 955 OCX_COM_LINK_RXFIFO_DBE | \ 956 OCX_COM_LINK_STOP) 957 958 #define OCX_COM_LINK_INT_CE (OCX_COM_LINK_REPLAY_SBE | \ 959 OCX_COM_LINK_TXFIFO_SBE | \ 960 OCX_COM_LINK_RXFIFO_SBE | \ 961 OCX_COM_LINK_BLK_ERR | \ 962 OCX_COM_LINK_ALIGN_FAIL | \ 963 OCX_COM_LINK_BAD_WORD) 964 965 #define OCX_LNE_INT(x) (0x8018 + (x) * 0x100) 966 #define OCX_LNE_INT_EN(x) (0x8020 + (x) * 0x100) 967 #define OCX_LNE_BAD_CNT(x) (0x8028 + (x) * 0x100) 968 #define OCX_LNE_CFG(x) (0x8000 + (x) * 0x100) 969 #define OCX_LNE_STAT(x, y) (0x8040 + (x) * 0x100 + (y) * 8) 970 971 #define OCX_LNE_CFG_RX_BDRY_LOCK_DIS BIT(8) 972 #define OCX_LNE_CFG_RX_STAT_WRAP_DIS BIT(2) 973 #define OCX_LNE_CFG_RX_STAT_RDCLR BIT(1) 974 #define OCX_LNE_CFG_RX_STAT_ENA BIT(0) 975 976 977 #define OCX_LANE_BAD_64B67B BIT(8) 978 #define OCX_LANE_DSKEW_FIFO_OVFL BIT(5) 979 #define OCX_LANE_SCRM_SYNC_LOSS BIT(4) 980 #define OCX_LANE_UKWN_CNTL_WORD BIT(3) 981 #define OCX_LANE_CRC32_ERR BIT(2) 982 #define OCX_LANE_BDRY_SYNC_LOSS BIT(1) 983 #define OCX_LANE_SERDES_LOCK_LOSS BIT(0) 984 985 #define OCX_COM_LANE_INT_UE (0) 986 #define OCX_COM_LANE_INT_CE (OCX_LANE_SERDES_LOCK_LOSS | \ 987 OCX_LANE_BDRY_SYNC_LOSS | \ 988 OCX_LANE_CRC32_ERR | \ 989 OCX_LANE_UKWN_CNTL_WORD | \ 990 OCX_LANE_SCRM_SYNC_LOSS | \ 991 OCX_LANE_DSKEW_FIFO_OVFL | \ 992 OCX_LANE_BAD_64B67B) 993 994 static const struct error_descr ocx_lane_errors[] = { 995 { 996 .type = ERR_CORRECTED, 997 .mask = OCX_LANE_SERDES_LOCK_LOSS, 998 .descr = "RX SerDes lock lost", 999 }, 1000 { 1001 .type = ERR_CORRECTED, 1002 .mask = OCX_LANE_BDRY_SYNC_LOSS, 1003 .descr = "RX word boundary lost", 1004 }, 1005 { 1006 .type = ERR_CORRECTED, 1007 .mask = OCX_LANE_CRC32_ERR, 1008 .descr = "CRC32 error", 1009 }, 1010 { 1011 .type = ERR_CORRECTED, 1012 .mask = OCX_LANE_UKWN_CNTL_WORD, 1013 .descr = "Unknown control word", 1014 }, 1015 { 1016 .type = ERR_CORRECTED, 1017 .mask = OCX_LANE_SCRM_SYNC_LOSS, 1018 .descr = "Scrambler synchronization lost", 1019 }, 1020 { 1021 .type = ERR_CORRECTED, 1022 .mask = OCX_LANE_DSKEW_FIFO_OVFL, 1023 .descr = "RX deskew FIFO overflow", 1024 }, 1025 { 1026 .type = ERR_CORRECTED, 1027 .mask = OCX_LANE_BAD_64B67B, 1028 .descr = "Bad 64B/67B codeword", 1029 }, 1030 {0, 0, NULL}, 1031 }; 1032 1033 #define OCX_LNE_INT_ENA_ALL (GENMASK(9, 8) | GENMASK(6, 0)) 1034 #define OCX_COM_INT_ENA_ALL (GENMASK(54, 50) | GENMASK(23, 0)) 1035 #define OCX_COM_LINKX_INT_ENA_ALL (GENMASK(13, 12) | \ 1036 GENMASK(9, 7) | GENMASK(5, 0)) 1037 1038 #define OCX_TLKX_ECC_CTL(x) (0x10018 + (x) * 0x2000) 1039 #define OCX_RLKX_ECC_CTL(x) (0x18018 + (x) * 0x2000) 1040 1041 struct ocx_com_err_ctx { 1042 u64 reg_com_int; 1043 u64 reg_lane_int[OCX_RX_LANES]; 1044 u64 reg_lane_stat11[OCX_RX_LANES]; 1045 }; 1046 1047 struct ocx_link_err_ctx { 1048 u64 reg_com_link_int; 1049 int link; 1050 }; 1051 1052 struct thunderx_ocx { 1053 void __iomem *regs; 1054 int com_link; 1055 struct pci_dev *pdev; 1056 struct edac_device_ctl_info *edac_dev; 1057 1058 struct dentry *debugfs; 1059 struct msix_entry msix_ent[OCX_INTS]; 1060 1061 struct ocx_com_err_ctx com_err_ctx[RING_ENTRIES]; 1062 struct ocx_link_err_ctx link_err_ctx[RING_ENTRIES]; 1063 1064 unsigned long com_ring_head; 1065 unsigned long com_ring_tail; 1066 1067 unsigned long link_ring_head; 1068 unsigned long link_ring_tail; 1069 }; 1070 1071 #define OCX_MESSAGE_SIZE SZ_1K 1072 #define OCX_OTHER_SIZE (50 * ARRAY_SIZE(ocx_com_link_errors)) 1073 1074 /* This handler is threaded */ 1075 static irqreturn_t thunderx_ocx_com_isr(int irq, void *irq_id) 1076 { 1077 struct msix_entry *msix = irq_id; 1078 struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, 1079 msix_ent[msix->entry]); 1080 1081 int lane; 1082 unsigned long head = ring_pos(ocx->com_ring_head, 1083 ARRAY_SIZE(ocx->com_err_ctx)); 1084 struct ocx_com_err_ctx *ctx = &ocx->com_err_ctx[head]; 1085 1086 ctx->reg_com_int = readq(ocx->regs + OCX_COM_INT); 1087 1088 for (lane = 0; lane < OCX_RX_LANES; lane++) { 1089 ctx->reg_lane_int[lane] = 1090 readq(ocx->regs + OCX_LNE_INT(lane)); 1091 ctx->reg_lane_stat11[lane] = 1092 readq(ocx->regs + OCX_LNE_STAT(lane, 11)); 1093 1094 writeq(ctx->reg_lane_int[lane], ocx->regs + OCX_LNE_INT(lane)); 1095 } 1096 1097 writeq(ctx->reg_com_int, ocx->regs + OCX_COM_INT); 1098 1099 ocx->com_ring_head++; 1100 1101 return IRQ_WAKE_THREAD; 1102 } 1103 1104 static irqreturn_t thunderx_ocx_com_threaded_isr(int irq, void *irq_id) 1105 { 1106 struct msix_entry *msix = irq_id; 1107 struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, 1108 msix_ent[msix->entry]); 1109 1110 irqreturn_t ret = IRQ_NONE; 1111 1112 unsigned long tail; 1113 struct ocx_com_err_ctx *ctx; 1114 int lane; 1115 char *msg; 1116 char *other; 1117 1118 msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); 1119 other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); 1120 1121 if (!msg || !other) 1122 goto err_free; 1123 1124 while (CIRC_CNT(ocx->com_ring_head, ocx->com_ring_tail, 1125 ARRAY_SIZE(ocx->com_err_ctx))) { 1126 tail = ring_pos(ocx->com_ring_tail, 1127 ARRAY_SIZE(ocx->com_err_ctx)); 1128 ctx = &ocx->com_err_ctx[tail]; 1129 1130 snprintf(msg, OCX_MESSAGE_SIZE, "%s: OCX_COM_INT: %016llx", 1131 ocx->edac_dev->ctl_name, ctx->reg_com_int); 1132 1133 decode_register(other, OCX_OTHER_SIZE, 1134 ocx_com_errors, ctx->reg_com_int); 1135 1136 strlcat(msg, other, OCX_MESSAGE_SIZE); 1137 1138 for (lane = 0; lane < OCX_RX_LANES; lane++) 1139 if (ctx->reg_com_int & BIT(lane)) { 1140 snprintf(other, OCX_OTHER_SIZE, 1141 "\n\tOCX_LNE_INT[%02d]: %016llx OCX_LNE_STAT11[%02d]: %016llx", 1142 lane, ctx->reg_lane_int[lane], 1143 lane, ctx->reg_lane_stat11[lane]); 1144 1145 strlcat(msg, other, OCX_MESSAGE_SIZE); 1146 1147 decode_register(other, OCX_OTHER_SIZE, 1148 ocx_lane_errors, 1149 ctx->reg_lane_int[lane]); 1150 strlcat(msg, other, OCX_MESSAGE_SIZE); 1151 } 1152 1153 if (ctx->reg_com_int & OCX_COM_INT_CE) 1154 edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); 1155 1156 ocx->com_ring_tail++; 1157 } 1158 1159 ret = IRQ_HANDLED; 1160 1161 err_free: 1162 kfree(other); 1163 kfree(msg); 1164 1165 return ret; 1166 } 1167 1168 static irqreturn_t thunderx_ocx_lnk_isr(int irq, void *irq_id) 1169 { 1170 struct msix_entry *msix = irq_id; 1171 struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, 1172 msix_ent[msix->entry]); 1173 unsigned long head = ring_pos(ocx->link_ring_head, 1174 ARRAY_SIZE(ocx->link_err_ctx)); 1175 struct ocx_link_err_ctx *ctx = &ocx->link_err_ctx[head]; 1176 1177 ctx->link = msix->entry; 1178 ctx->reg_com_link_int = readq(ocx->regs + OCX_COM_LINKX_INT(ctx->link)); 1179 1180 writeq(ctx->reg_com_link_int, ocx->regs + OCX_COM_LINKX_INT(ctx->link)); 1181 1182 ocx->link_ring_head++; 1183 1184 return IRQ_WAKE_THREAD; 1185 } 1186 1187 static irqreturn_t thunderx_ocx_lnk_threaded_isr(int irq, void *irq_id) 1188 { 1189 struct msix_entry *msix = irq_id; 1190 struct thunderx_ocx *ocx = container_of(msix, struct thunderx_ocx, 1191 msix_ent[msix->entry]); 1192 irqreturn_t ret = IRQ_NONE; 1193 unsigned long tail; 1194 struct ocx_link_err_ctx *ctx; 1195 1196 char *msg; 1197 char *other; 1198 1199 msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); 1200 other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); 1201 1202 if (!msg || !other) 1203 goto err_free; 1204 1205 while (CIRC_CNT(ocx->link_ring_head, ocx->link_ring_tail, 1206 ARRAY_SIZE(ocx->link_err_ctx))) { 1207 tail = ring_pos(ocx->link_ring_head, 1208 ARRAY_SIZE(ocx->link_err_ctx)); 1209 1210 ctx = &ocx->link_err_ctx[tail]; 1211 1212 snprintf(msg, OCX_MESSAGE_SIZE, 1213 "%s: OCX_COM_LINK_INT[%d]: %016llx", 1214 ocx->edac_dev->ctl_name, 1215 ctx->link, ctx->reg_com_link_int); 1216 1217 decode_register(other, OCX_OTHER_SIZE, 1218 ocx_com_link_errors, ctx->reg_com_link_int); 1219 1220 strlcat(msg, other, OCX_MESSAGE_SIZE); 1221 1222 if (ctx->reg_com_link_int & OCX_COM_LINK_INT_UE) 1223 edac_device_handle_ue(ocx->edac_dev, 0, 0, msg); 1224 else if (ctx->reg_com_link_int & OCX_COM_LINK_INT_CE) 1225 edac_device_handle_ce(ocx->edac_dev, 0, 0, msg); 1226 1227 ocx->link_ring_tail++; 1228 } 1229 1230 ret = IRQ_HANDLED; 1231 err_free: 1232 kfree(other); 1233 kfree(msg); 1234 1235 return ret; 1236 } 1237 1238 #define OCX_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(ocx, _name, _reg) 1239 1240 OCX_DEBUGFS_ATTR(tlk0_ecc_ctl, OCX_TLKX_ECC_CTL(0)); 1241 OCX_DEBUGFS_ATTR(tlk1_ecc_ctl, OCX_TLKX_ECC_CTL(1)); 1242 OCX_DEBUGFS_ATTR(tlk2_ecc_ctl, OCX_TLKX_ECC_CTL(2)); 1243 1244 OCX_DEBUGFS_ATTR(rlk0_ecc_ctl, OCX_RLKX_ECC_CTL(0)); 1245 OCX_DEBUGFS_ATTR(rlk1_ecc_ctl, OCX_RLKX_ECC_CTL(1)); 1246 OCX_DEBUGFS_ATTR(rlk2_ecc_ctl, OCX_RLKX_ECC_CTL(2)); 1247 1248 OCX_DEBUGFS_ATTR(com_link0_int, OCX_COM_LINKX_INT_W1S(0)); 1249 OCX_DEBUGFS_ATTR(com_link1_int, OCX_COM_LINKX_INT_W1S(1)); 1250 OCX_DEBUGFS_ATTR(com_link2_int, OCX_COM_LINKX_INT_W1S(2)); 1251 1252 OCX_DEBUGFS_ATTR(lne00_badcnt, OCX_LNE_BAD_CNT(0)); 1253 OCX_DEBUGFS_ATTR(lne01_badcnt, OCX_LNE_BAD_CNT(1)); 1254 OCX_DEBUGFS_ATTR(lne02_badcnt, OCX_LNE_BAD_CNT(2)); 1255 OCX_DEBUGFS_ATTR(lne03_badcnt, OCX_LNE_BAD_CNT(3)); 1256 OCX_DEBUGFS_ATTR(lne04_badcnt, OCX_LNE_BAD_CNT(4)); 1257 OCX_DEBUGFS_ATTR(lne05_badcnt, OCX_LNE_BAD_CNT(5)); 1258 OCX_DEBUGFS_ATTR(lne06_badcnt, OCX_LNE_BAD_CNT(6)); 1259 OCX_DEBUGFS_ATTR(lne07_badcnt, OCX_LNE_BAD_CNT(7)); 1260 1261 OCX_DEBUGFS_ATTR(lne08_badcnt, OCX_LNE_BAD_CNT(8)); 1262 OCX_DEBUGFS_ATTR(lne09_badcnt, OCX_LNE_BAD_CNT(9)); 1263 OCX_DEBUGFS_ATTR(lne10_badcnt, OCX_LNE_BAD_CNT(10)); 1264 OCX_DEBUGFS_ATTR(lne11_badcnt, OCX_LNE_BAD_CNT(11)); 1265 OCX_DEBUGFS_ATTR(lne12_badcnt, OCX_LNE_BAD_CNT(12)); 1266 OCX_DEBUGFS_ATTR(lne13_badcnt, OCX_LNE_BAD_CNT(13)); 1267 OCX_DEBUGFS_ATTR(lne14_badcnt, OCX_LNE_BAD_CNT(14)); 1268 OCX_DEBUGFS_ATTR(lne15_badcnt, OCX_LNE_BAD_CNT(15)); 1269 1270 OCX_DEBUGFS_ATTR(lne16_badcnt, OCX_LNE_BAD_CNT(16)); 1271 OCX_DEBUGFS_ATTR(lne17_badcnt, OCX_LNE_BAD_CNT(17)); 1272 OCX_DEBUGFS_ATTR(lne18_badcnt, OCX_LNE_BAD_CNT(18)); 1273 OCX_DEBUGFS_ATTR(lne19_badcnt, OCX_LNE_BAD_CNT(19)); 1274 OCX_DEBUGFS_ATTR(lne20_badcnt, OCX_LNE_BAD_CNT(20)); 1275 OCX_DEBUGFS_ATTR(lne21_badcnt, OCX_LNE_BAD_CNT(21)); 1276 OCX_DEBUGFS_ATTR(lne22_badcnt, OCX_LNE_BAD_CNT(22)); 1277 OCX_DEBUGFS_ATTR(lne23_badcnt, OCX_LNE_BAD_CNT(23)); 1278 1279 OCX_DEBUGFS_ATTR(com_int, OCX_COM_INT_W1S); 1280 1281 static struct debugfs_entry *ocx_dfs_ents[] = { 1282 &debugfs_tlk0_ecc_ctl, 1283 &debugfs_tlk1_ecc_ctl, 1284 &debugfs_tlk2_ecc_ctl, 1285 1286 &debugfs_rlk0_ecc_ctl, 1287 &debugfs_rlk1_ecc_ctl, 1288 &debugfs_rlk2_ecc_ctl, 1289 1290 &debugfs_com_link0_int, 1291 &debugfs_com_link1_int, 1292 &debugfs_com_link2_int, 1293 1294 &debugfs_lne00_badcnt, 1295 &debugfs_lne01_badcnt, 1296 &debugfs_lne02_badcnt, 1297 &debugfs_lne03_badcnt, 1298 &debugfs_lne04_badcnt, 1299 &debugfs_lne05_badcnt, 1300 &debugfs_lne06_badcnt, 1301 &debugfs_lne07_badcnt, 1302 &debugfs_lne08_badcnt, 1303 &debugfs_lne09_badcnt, 1304 &debugfs_lne10_badcnt, 1305 &debugfs_lne11_badcnt, 1306 &debugfs_lne12_badcnt, 1307 &debugfs_lne13_badcnt, 1308 &debugfs_lne14_badcnt, 1309 &debugfs_lne15_badcnt, 1310 &debugfs_lne16_badcnt, 1311 &debugfs_lne17_badcnt, 1312 &debugfs_lne18_badcnt, 1313 &debugfs_lne19_badcnt, 1314 &debugfs_lne20_badcnt, 1315 &debugfs_lne21_badcnt, 1316 &debugfs_lne22_badcnt, 1317 &debugfs_lne23_badcnt, 1318 1319 &debugfs_com_int, 1320 }; 1321 1322 static const struct pci_device_id thunderx_ocx_pci_tbl[] = { 1323 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_OCX) }, 1324 { 0, }, 1325 }; 1326 1327 static void thunderx_ocx_clearstats(struct thunderx_ocx *ocx) 1328 { 1329 int lane, stat, cfg; 1330 1331 for (lane = 0; lane < OCX_RX_LANES; lane++) { 1332 cfg = readq(ocx->regs + OCX_LNE_CFG(lane)); 1333 cfg |= OCX_LNE_CFG_RX_STAT_RDCLR; 1334 cfg &= ~OCX_LNE_CFG_RX_STAT_ENA; 1335 writeq(cfg, ocx->regs + OCX_LNE_CFG(lane)); 1336 1337 for (stat = 0; stat < OCX_RX_LANE_STATS; stat++) 1338 readq(ocx->regs + OCX_LNE_STAT(lane, stat)); 1339 } 1340 } 1341 1342 static int thunderx_ocx_probe(struct pci_dev *pdev, 1343 const struct pci_device_id *id) 1344 { 1345 struct thunderx_ocx *ocx; 1346 struct edac_device_ctl_info *edac_dev; 1347 char name[32]; 1348 int idx; 1349 int i; 1350 int ret; 1351 u64 reg; 1352 1353 ret = pcim_enable_device(pdev); 1354 if (ret) { 1355 dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); 1356 return ret; 1357 } 1358 1359 ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_ocx"); 1360 if (ret) { 1361 dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); 1362 return ret; 1363 } 1364 1365 idx = edac_device_alloc_index(); 1366 snprintf(name, sizeof(name), "OCX%d", idx); 1367 edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_ocx), 1368 name, 1, "CCPI", 1, 1369 0, NULL, 0, idx); 1370 if (!edac_dev) { 1371 dev_err(&pdev->dev, "Cannot allocate EDAC device\n"); 1372 return -ENOMEM; 1373 } 1374 ocx = edac_dev->pvt_info; 1375 ocx->edac_dev = edac_dev; 1376 ocx->com_ring_head = 0; 1377 ocx->com_ring_tail = 0; 1378 ocx->link_ring_head = 0; 1379 ocx->link_ring_tail = 0; 1380 1381 ocx->regs = pcim_iomap_table(pdev)[0]; 1382 if (!ocx->regs) { 1383 dev_err(&pdev->dev, "Cannot map PCI resources\n"); 1384 ret = -ENODEV; 1385 goto err_free; 1386 } 1387 1388 ocx->pdev = pdev; 1389 1390 for (i = 0; i < OCX_INTS; i++) { 1391 ocx->msix_ent[i].entry = i; 1392 ocx->msix_ent[i].vector = 0; 1393 } 1394 1395 ret = pci_enable_msix_exact(pdev, ocx->msix_ent, OCX_INTS); 1396 if (ret) { 1397 dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); 1398 goto err_free; 1399 } 1400 1401 for (i = 0; i < OCX_INTS; i++) { 1402 ret = devm_request_threaded_irq(&pdev->dev, 1403 ocx->msix_ent[i].vector, 1404 (i == 3) ? 1405 thunderx_ocx_com_isr : 1406 thunderx_ocx_lnk_isr, 1407 (i == 3) ? 1408 thunderx_ocx_com_threaded_isr : 1409 thunderx_ocx_lnk_threaded_isr, 1410 0, "[EDAC] ThunderX OCX", 1411 &ocx->msix_ent[i]); 1412 if (ret) 1413 goto err_free; 1414 } 1415 1416 edac_dev->dev = &pdev->dev; 1417 edac_dev->dev_name = dev_name(&pdev->dev); 1418 edac_dev->mod_name = "thunderx-ocx"; 1419 edac_dev->ctl_name = "thunderx-ocx"; 1420 1421 ret = edac_device_add_device(edac_dev); 1422 if (ret) { 1423 dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret); 1424 goto err_free; 1425 } 1426 1427 if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { 1428 ocx->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name); 1429 1430 ret = thunderx_create_debugfs_nodes(ocx->debugfs, 1431 ocx_dfs_ents, 1432 ocx, 1433 ARRAY_SIZE(ocx_dfs_ents)); 1434 if (ret != ARRAY_SIZE(ocx_dfs_ents)) { 1435 dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", 1436 ret, ret >= 0 ? " created" : ""); 1437 } 1438 } 1439 1440 pci_set_drvdata(pdev, edac_dev); 1441 1442 thunderx_ocx_clearstats(ocx); 1443 1444 for (i = 0; i < OCX_RX_LANES; i++) { 1445 writeq(OCX_LNE_INT_ENA_ALL, 1446 ocx->regs + OCX_LNE_INT_EN(i)); 1447 1448 reg = readq(ocx->regs + OCX_LNE_INT(i)); 1449 writeq(reg, ocx->regs + OCX_LNE_INT(i)); 1450 1451 } 1452 1453 for (i = 0; i < OCX_LINK_INTS; i++) { 1454 reg = readq(ocx->regs + OCX_COM_LINKX_INT(i)); 1455 writeq(reg, ocx->regs + OCX_COM_LINKX_INT(i)); 1456 1457 writeq(OCX_COM_LINKX_INT_ENA_ALL, 1458 ocx->regs + OCX_COM_LINKX_INT_ENA_W1S(i)); 1459 } 1460 1461 reg = readq(ocx->regs + OCX_COM_INT); 1462 writeq(reg, ocx->regs + OCX_COM_INT); 1463 1464 writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1S); 1465 1466 return 0; 1467 err_free: 1468 edac_device_free_ctl_info(edac_dev); 1469 1470 return ret; 1471 } 1472 1473 static void thunderx_ocx_remove(struct pci_dev *pdev) 1474 { 1475 struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev); 1476 struct thunderx_ocx *ocx = edac_dev->pvt_info; 1477 int i; 1478 1479 writeq(OCX_COM_INT_ENA_ALL, ocx->regs + OCX_COM_INT_ENA_W1C); 1480 1481 for (i = 0; i < OCX_INTS; i++) { 1482 writeq(OCX_COM_LINKX_INT_ENA_ALL, 1483 ocx->regs + OCX_COM_LINKX_INT_ENA_W1C(i)); 1484 } 1485 1486 edac_debugfs_remove_recursive(ocx->debugfs); 1487 1488 edac_device_del_device(&pdev->dev); 1489 edac_device_free_ctl_info(edac_dev); 1490 } 1491 1492 MODULE_DEVICE_TABLE(pci, thunderx_ocx_pci_tbl); 1493 1494 static struct pci_driver thunderx_ocx_driver = { 1495 .name = "thunderx_ocx_edac", 1496 .probe = thunderx_ocx_probe, 1497 .remove = thunderx_ocx_remove, 1498 .id_table = thunderx_ocx_pci_tbl, 1499 }; 1500 1501 /*---------------------- L2C driver ---------------------------------*/ 1502 1503 #define PCI_DEVICE_ID_THUNDER_L2C_TAD 0xa02e 1504 #define PCI_DEVICE_ID_THUNDER_L2C_CBC 0xa02f 1505 #define PCI_DEVICE_ID_THUNDER_L2C_MCI 0xa030 1506 1507 #define L2C_TAD_INT_W1C 0x40000 1508 #define L2C_TAD_INT_W1S 0x40008 1509 1510 #define L2C_TAD_INT_ENA_W1C 0x40020 1511 #define L2C_TAD_INT_ENA_W1S 0x40028 1512 1513 1514 #define L2C_TAD_INT_L2DDBE BIT(1) 1515 #define L2C_TAD_INT_SBFSBE BIT(2) 1516 #define L2C_TAD_INT_SBFDBE BIT(3) 1517 #define L2C_TAD_INT_FBFSBE BIT(4) 1518 #define L2C_TAD_INT_FBFDBE BIT(5) 1519 #define L2C_TAD_INT_TAGDBE BIT(9) 1520 #define L2C_TAD_INT_RDDISLMC BIT(15) 1521 #define L2C_TAD_INT_WRDISLMC BIT(16) 1522 #define L2C_TAD_INT_LFBTO BIT(17) 1523 #define L2C_TAD_INT_GSYNCTO BIT(18) 1524 #define L2C_TAD_INT_RTGSBE BIT(32) 1525 #define L2C_TAD_INT_RTGDBE BIT(33) 1526 #define L2C_TAD_INT_RDDISOCI BIT(34) 1527 #define L2C_TAD_INT_WRDISOCI BIT(35) 1528 1529 #define L2C_TAD_INT_ECC (L2C_TAD_INT_L2DDBE | \ 1530 L2C_TAD_INT_SBFSBE | L2C_TAD_INT_SBFDBE | \ 1531 L2C_TAD_INT_FBFSBE | L2C_TAD_INT_FBFDBE) 1532 1533 #define L2C_TAD_INT_CE (L2C_TAD_INT_SBFSBE | \ 1534 L2C_TAD_INT_FBFSBE) 1535 1536 #define L2C_TAD_INT_UE (L2C_TAD_INT_L2DDBE | \ 1537 L2C_TAD_INT_SBFDBE | \ 1538 L2C_TAD_INT_FBFDBE | \ 1539 L2C_TAD_INT_TAGDBE | \ 1540 L2C_TAD_INT_RTGDBE | \ 1541 L2C_TAD_INT_WRDISOCI | \ 1542 L2C_TAD_INT_RDDISOCI | \ 1543 L2C_TAD_INT_WRDISLMC | \ 1544 L2C_TAD_INT_RDDISLMC | \ 1545 L2C_TAD_INT_LFBTO | \ 1546 L2C_TAD_INT_GSYNCTO) 1547 1548 static const struct error_descr l2_tad_errors[] = { 1549 { 1550 .type = ERR_CORRECTED, 1551 .mask = L2C_TAD_INT_SBFSBE, 1552 .descr = "SBF single-bit error", 1553 }, 1554 { 1555 .type = ERR_CORRECTED, 1556 .mask = L2C_TAD_INT_FBFSBE, 1557 .descr = "FBF single-bit error", 1558 }, 1559 { 1560 .type = ERR_UNCORRECTED, 1561 .mask = L2C_TAD_INT_L2DDBE, 1562 .descr = "L2D double-bit error", 1563 }, 1564 { 1565 .type = ERR_UNCORRECTED, 1566 .mask = L2C_TAD_INT_SBFDBE, 1567 .descr = "SBF double-bit error", 1568 }, 1569 { 1570 .type = ERR_UNCORRECTED, 1571 .mask = L2C_TAD_INT_FBFDBE, 1572 .descr = "FBF double-bit error", 1573 }, 1574 { 1575 .type = ERR_UNCORRECTED, 1576 .mask = L2C_TAD_INT_TAGDBE, 1577 .descr = "TAG double-bit error", 1578 }, 1579 { 1580 .type = ERR_UNCORRECTED, 1581 .mask = L2C_TAD_INT_RTGDBE, 1582 .descr = "RTG double-bit error", 1583 }, 1584 { 1585 .type = ERR_UNCORRECTED, 1586 .mask = L2C_TAD_INT_WRDISOCI, 1587 .descr = "Write to a disabled CCPI", 1588 }, 1589 { 1590 .type = ERR_UNCORRECTED, 1591 .mask = L2C_TAD_INT_RDDISOCI, 1592 .descr = "Read from a disabled CCPI", 1593 }, 1594 { 1595 .type = ERR_UNCORRECTED, 1596 .mask = L2C_TAD_INT_WRDISLMC, 1597 .descr = "Write to a disabled LMC", 1598 }, 1599 { 1600 .type = ERR_UNCORRECTED, 1601 .mask = L2C_TAD_INT_RDDISLMC, 1602 .descr = "Read from a disabled LMC", 1603 }, 1604 { 1605 .type = ERR_UNCORRECTED, 1606 .mask = L2C_TAD_INT_LFBTO, 1607 .descr = "LFB entry timeout", 1608 }, 1609 { 1610 .type = ERR_UNCORRECTED, 1611 .mask = L2C_TAD_INT_GSYNCTO, 1612 .descr = "Global sync CCPI timeout", 1613 }, 1614 {0, 0, NULL}, 1615 }; 1616 1617 #define L2C_TAD_INT_TAG (L2C_TAD_INT_TAGDBE) 1618 1619 #define L2C_TAD_INT_RTG (L2C_TAD_INT_RTGDBE) 1620 1621 #define L2C_TAD_INT_DISLMC (L2C_TAD_INT_WRDISLMC | L2C_TAD_INT_RDDISLMC) 1622 1623 #define L2C_TAD_INT_DISOCI (L2C_TAD_INT_WRDISOCI | L2C_TAD_INT_RDDISOCI) 1624 1625 #define L2C_TAD_INT_ENA_ALL (L2C_TAD_INT_ECC | L2C_TAD_INT_TAG | \ 1626 L2C_TAD_INT_RTG | \ 1627 L2C_TAD_INT_DISLMC | L2C_TAD_INT_DISOCI | \ 1628 L2C_TAD_INT_LFBTO) 1629 1630 #define L2C_TAD_TIMETWO 0x50000 1631 #define L2C_TAD_TIMEOUT 0x50100 1632 #define L2C_TAD_ERR 0x60000 1633 #define L2C_TAD_TQD_ERR 0x60100 1634 #define L2C_TAD_TTG_ERR 0x60200 1635 1636 1637 #define L2C_CBC_INT_W1C 0x60000 1638 1639 #define L2C_CBC_INT_RSDSBE BIT(0) 1640 #define L2C_CBC_INT_RSDDBE BIT(1) 1641 1642 #define L2C_CBC_INT_RSD (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_RSDDBE) 1643 1644 #define L2C_CBC_INT_MIBSBE BIT(4) 1645 #define L2C_CBC_INT_MIBDBE BIT(5) 1646 1647 #define L2C_CBC_INT_MIB (L2C_CBC_INT_MIBSBE | L2C_CBC_INT_MIBDBE) 1648 1649 #define L2C_CBC_INT_IORDDISOCI BIT(6) 1650 #define L2C_CBC_INT_IOWRDISOCI BIT(7) 1651 1652 #define L2C_CBC_INT_IODISOCI (L2C_CBC_INT_IORDDISOCI | \ 1653 L2C_CBC_INT_IOWRDISOCI) 1654 1655 #define L2C_CBC_INT_CE (L2C_CBC_INT_RSDSBE | L2C_CBC_INT_MIBSBE) 1656 #define L2C_CBC_INT_UE (L2C_CBC_INT_RSDDBE | L2C_CBC_INT_MIBDBE) 1657 1658 1659 static const struct error_descr l2_cbc_errors[] = { 1660 { 1661 .type = ERR_CORRECTED, 1662 .mask = L2C_CBC_INT_RSDSBE, 1663 .descr = "RSD single-bit error", 1664 }, 1665 { 1666 .type = ERR_CORRECTED, 1667 .mask = L2C_CBC_INT_MIBSBE, 1668 .descr = "MIB single-bit error", 1669 }, 1670 { 1671 .type = ERR_UNCORRECTED, 1672 .mask = L2C_CBC_INT_RSDDBE, 1673 .descr = "RSD double-bit error", 1674 }, 1675 { 1676 .type = ERR_UNCORRECTED, 1677 .mask = L2C_CBC_INT_MIBDBE, 1678 .descr = "MIB double-bit error", 1679 }, 1680 { 1681 .type = ERR_UNCORRECTED, 1682 .mask = L2C_CBC_INT_IORDDISOCI, 1683 .descr = "Read from a disabled CCPI", 1684 }, 1685 { 1686 .type = ERR_UNCORRECTED, 1687 .mask = L2C_CBC_INT_IOWRDISOCI, 1688 .descr = "Write to a disabled CCPI", 1689 }, 1690 {0, 0, NULL}, 1691 }; 1692 1693 #define L2C_CBC_INT_W1S 0x60008 1694 #define L2C_CBC_INT_ENA_W1C 0x60020 1695 1696 #define L2C_CBC_INT_ENA_ALL (L2C_CBC_INT_RSD | L2C_CBC_INT_MIB | \ 1697 L2C_CBC_INT_IODISOCI) 1698 1699 #define L2C_CBC_INT_ENA_W1S 0x60028 1700 1701 #define L2C_CBC_IODISOCIERR 0x80008 1702 #define L2C_CBC_IOCERR 0x80010 1703 #define L2C_CBC_RSDERR 0x80018 1704 #define L2C_CBC_MIBERR 0x80020 1705 1706 1707 #define L2C_MCI_INT_W1C 0x0 1708 1709 #define L2C_MCI_INT_VBFSBE BIT(0) 1710 #define L2C_MCI_INT_VBFDBE BIT(1) 1711 1712 static const struct error_descr l2_mci_errors[] = { 1713 { 1714 .type = ERR_CORRECTED, 1715 .mask = L2C_MCI_INT_VBFSBE, 1716 .descr = "VBF single-bit error", 1717 }, 1718 { 1719 .type = ERR_UNCORRECTED, 1720 .mask = L2C_MCI_INT_VBFDBE, 1721 .descr = "VBF double-bit error", 1722 }, 1723 {0, 0, NULL}, 1724 }; 1725 1726 #define L2C_MCI_INT_W1S 0x8 1727 #define L2C_MCI_INT_ENA_W1C 0x20 1728 1729 #define L2C_MCI_INT_ENA_ALL (L2C_MCI_INT_VBFSBE | L2C_MCI_INT_VBFDBE) 1730 1731 #define L2C_MCI_INT_ENA_W1S 0x28 1732 1733 #define L2C_MCI_ERR 0x10000 1734 1735 #define L2C_MESSAGE_SIZE SZ_1K 1736 #define L2C_OTHER_SIZE (50 * ARRAY_SIZE(l2_tad_errors)) 1737 1738 struct l2c_err_ctx { 1739 char *reg_ext_name; 1740 u64 reg_int; 1741 u64 reg_ext; 1742 }; 1743 1744 struct thunderx_l2c { 1745 void __iomem *regs; 1746 struct pci_dev *pdev; 1747 struct edac_device_ctl_info *edac_dev; 1748 1749 struct dentry *debugfs; 1750 1751 int index; 1752 1753 struct msix_entry msix_ent; 1754 1755 struct l2c_err_ctx err_ctx[RING_ENTRIES]; 1756 unsigned long ring_head; 1757 unsigned long ring_tail; 1758 }; 1759 1760 static irqreturn_t thunderx_l2c_tad_isr(int irq, void *irq_id) 1761 { 1762 struct msix_entry *msix = irq_id; 1763 struct thunderx_l2c *tad = container_of(msix, struct thunderx_l2c, 1764 msix_ent); 1765 1766 unsigned long head = ring_pos(tad->ring_head, ARRAY_SIZE(tad->err_ctx)); 1767 struct l2c_err_ctx *ctx = &tad->err_ctx[head]; 1768 1769 ctx->reg_int = readq(tad->regs + L2C_TAD_INT_W1C); 1770 1771 if (ctx->reg_int & L2C_TAD_INT_ECC) { 1772 ctx->reg_ext_name = "TQD_ERR"; 1773 ctx->reg_ext = readq(tad->regs + L2C_TAD_TQD_ERR); 1774 } else if (ctx->reg_int & L2C_TAD_INT_TAG) { 1775 ctx->reg_ext_name = "TTG_ERR"; 1776 ctx->reg_ext = readq(tad->regs + L2C_TAD_TTG_ERR); 1777 } else if (ctx->reg_int & L2C_TAD_INT_LFBTO) { 1778 ctx->reg_ext_name = "TIMEOUT"; 1779 ctx->reg_ext = readq(tad->regs + L2C_TAD_TIMEOUT); 1780 } else if (ctx->reg_int & L2C_TAD_INT_DISOCI) { 1781 ctx->reg_ext_name = "ERR"; 1782 ctx->reg_ext = readq(tad->regs + L2C_TAD_ERR); 1783 } 1784 1785 writeq(ctx->reg_int, tad->regs + L2C_TAD_INT_W1C); 1786 1787 tad->ring_head++; 1788 1789 return IRQ_WAKE_THREAD; 1790 } 1791 1792 static irqreturn_t thunderx_l2c_cbc_isr(int irq, void *irq_id) 1793 { 1794 struct msix_entry *msix = irq_id; 1795 struct thunderx_l2c *cbc = container_of(msix, struct thunderx_l2c, 1796 msix_ent); 1797 1798 unsigned long head = ring_pos(cbc->ring_head, ARRAY_SIZE(cbc->err_ctx)); 1799 struct l2c_err_ctx *ctx = &cbc->err_ctx[head]; 1800 1801 ctx->reg_int = readq(cbc->regs + L2C_CBC_INT_W1C); 1802 1803 if (ctx->reg_int & L2C_CBC_INT_RSD) { 1804 ctx->reg_ext_name = "RSDERR"; 1805 ctx->reg_ext = readq(cbc->regs + L2C_CBC_RSDERR); 1806 } else if (ctx->reg_int & L2C_CBC_INT_MIB) { 1807 ctx->reg_ext_name = "MIBERR"; 1808 ctx->reg_ext = readq(cbc->regs + L2C_CBC_MIBERR); 1809 } else if (ctx->reg_int & L2C_CBC_INT_IODISOCI) { 1810 ctx->reg_ext_name = "IODISOCIERR"; 1811 ctx->reg_ext = readq(cbc->regs + L2C_CBC_IODISOCIERR); 1812 } 1813 1814 writeq(ctx->reg_int, cbc->regs + L2C_CBC_INT_W1C); 1815 1816 cbc->ring_head++; 1817 1818 return IRQ_WAKE_THREAD; 1819 } 1820 1821 static irqreturn_t thunderx_l2c_mci_isr(int irq, void *irq_id) 1822 { 1823 struct msix_entry *msix = irq_id; 1824 struct thunderx_l2c *mci = container_of(msix, struct thunderx_l2c, 1825 msix_ent); 1826 1827 unsigned long head = ring_pos(mci->ring_head, ARRAY_SIZE(mci->err_ctx)); 1828 struct l2c_err_ctx *ctx = &mci->err_ctx[head]; 1829 1830 ctx->reg_int = readq(mci->regs + L2C_MCI_INT_W1C); 1831 ctx->reg_ext = readq(mci->regs + L2C_MCI_ERR); 1832 1833 writeq(ctx->reg_int, mci->regs + L2C_MCI_INT_W1C); 1834 1835 ctx->reg_ext_name = "ERR"; 1836 1837 mci->ring_head++; 1838 1839 return IRQ_WAKE_THREAD; 1840 } 1841 1842 static irqreturn_t thunderx_l2c_threaded_isr(int irq, void *irq_id) 1843 { 1844 struct msix_entry *msix = irq_id; 1845 struct thunderx_l2c *l2c = container_of(msix, struct thunderx_l2c, 1846 msix_ent); 1847 1848 unsigned long tail = ring_pos(l2c->ring_tail, ARRAY_SIZE(l2c->err_ctx)); 1849 struct l2c_err_ctx *ctx = &l2c->err_ctx[tail]; 1850 irqreturn_t ret = IRQ_NONE; 1851 1852 u64 mask_ue, mask_ce; 1853 const struct error_descr *l2_errors; 1854 char *reg_int_name; 1855 1856 char *msg; 1857 char *other; 1858 1859 msg = kmalloc(OCX_MESSAGE_SIZE, GFP_KERNEL); 1860 other = kmalloc(OCX_OTHER_SIZE, GFP_KERNEL); 1861 1862 if (!msg || !other) 1863 goto err_free; 1864 1865 switch (l2c->pdev->device) { 1866 case PCI_DEVICE_ID_THUNDER_L2C_TAD: 1867 reg_int_name = "L2C_TAD_INT"; 1868 mask_ue = L2C_TAD_INT_UE; 1869 mask_ce = L2C_TAD_INT_CE; 1870 l2_errors = l2_tad_errors; 1871 break; 1872 case PCI_DEVICE_ID_THUNDER_L2C_CBC: 1873 reg_int_name = "L2C_CBC_INT"; 1874 mask_ue = L2C_CBC_INT_UE; 1875 mask_ce = L2C_CBC_INT_CE; 1876 l2_errors = l2_cbc_errors; 1877 break; 1878 case PCI_DEVICE_ID_THUNDER_L2C_MCI: 1879 reg_int_name = "L2C_MCI_INT"; 1880 mask_ue = L2C_MCI_INT_VBFDBE; 1881 mask_ce = L2C_MCI_INT_VBFSBE; 1882 l2_errors = l2_mci_errors; 1883 break; 1884 default: 1885 dev_err(&l2c->pdev->dev, "Unsupported device: %04x\n", 1886 l2c->pdev->device); 1887 goto err_free; 1888 } 1889 1890 while (CIRC_CNT(l2c->ring_head, l2c->ring_tail, 1891 ARRAY_SIZE(l2c->err_ctx))) { 1892 snprintf(msg, L2C_MESSAGE_SIZE, 1893 "%s: %s: %016llx, %s: %016llx", 1894 l2c->edac_dev->ctl_name, reg_int_name, ctx->reg_int, 1895 ctx->reg_ext_name, ctx->reg_ext); 1896 1897 decode_register(other, L2C_OTHER_SIZE, l2_errors, ctx->reg_int); 1898 1899 strlcat(msg, other, L2C_MESSAGE_SIZE); 1900 1901 if (ctx->reg_int & mask_ue) 1902 edac_device_handle_ue(l2c->edac_dev, 0, 0, msg); 1903 else if (ctx->reg_int & mask_ce) 1904 edac_device_handle_ce(l2c->edac_dev, 0, 0, msg); 1905 1906 l2c->ring_tail++; 1907 } 1908 1909 ret = IRQ_HANDLED; 1910 1911 err_free: 1912 kfree(other); 1913 kfree(msg); 1914 1915 return ret; 1916 } 1917 1918 #define L2C_DEBUGFS_ATTR(_name, _reg) DEBUGFS_REG_ATTR(l2c, _name, _reg) 1919 1920 L2C_DEBUGFS_ATTR(tad_int, L2C_TAD_INT_W1S); 1921 1922 static struct debugfs_entry *l2c_tad_dfs_ents[] = { 1923 &debugfs_tad_int, 1924 }; 1925 1926 L2C_DEBUGFS_ATTR(cbc_int, L2C_CBC_INT_W1S); 1927 1928 static struct debugfs_entry *l2c_cbc_dfs_ents[] = { 1929 &debugfs_cbc_int, 1930 }; 1931 1932 L2C_DEBUGFS_ATTR(mci_int, L2C_MCI_INT_W1S); 1933 1934 static struct debugfs_entry *l2c_mci_dfs_ents[] = { 1935 &debugfs_mci_int, 1936 }; 1937 1938 static const struct pci_device_id thunderx_l2c_pci_tbl[] = { 1939 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_TAD), }, 1940 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_CBC), }, 1941 { PCI_DEVICE(PCI_VENDOR_ID_CAVIUM, PCI_DEVICE_ID_THUNDER_L2C_MCI), }, 1942 { 0, }, 1943 }; 1944 1945 static int thunderx_l2c_probe(struct pci_dev *pdev, 1946 const struct pci_device_id *id) 1947 { 1948 struct thunderx_l2c *l2c; 1949 struct edac_device_ctl_info *edac_dev; 1950 struct debugfs_entry **l2c_devattr; 1951 size_t dfs_entries; 1952 irqreturn_t (*thunderx_l2c_isr)(int, void *) = NULL; 1953 char name[32]; 1954 const char *fmt; 1955 u64 reg_en_offs, reg_en_mask; 1956 int idx; 1957 int ret; 1958 1959 ret = pcim_enable_device(pdev); 1960 if (ret) { 1961 dev_err(&pdev->dev, "Cannot enable PCI device: %d\n", ret); 1962 return ret; 1963 } 1964 1965 ret = pcim_iomap_regions(pdev, BIT(0), "thunderx_l2c"); 1966 if (ret) { 1967 dev_err(&pdev->dev, "Cannot map PCI resources: %d\n", ret); 1968 return ret; 1969 } 1970 1971 switch (pdev->device) { 1972 case PCI_DEVICE_ID_THUNDER_L2C_TAD: 1973 thunderx_l2c_isr = thunderx_l2c_tad_isr; 1974 l2c_devattr = l2c_tad_dfs_ents; 1975 dfs_entries = ARRAY_SIZE(l2c_tad_dfs_ents); 1976 fmt = "L2C-TAD%d"; 1977 reg_en_offs = L2C_TAD_INT_ENA_W1S; 1978 reg_en_mask = L2C_TAD_INT_ENA_ALL; 1979 break; 1980 case PCI_DEVICE_ID_THUNDER_L2C_CBC: 1981 thunderx_l2c_isr = thunderx_l2c_cbc_isr; 1982 l2c_devattr = l2c_cbc_dfs_ents; 1983 dfs_entries = ARRAY_SIZE(l2c_cbc_dfs_ents); 1984 fmt = "L2C-CBC%d"; 1985 reg_en_offs = L2C_CBC_INT_ENA_W1S; 1986 reg_en_mask = L2C_CBC_INT_ENA_ALL; 1987 break; 1988 case PCI_DEVICE_ID_THUNDER_L2C_MCI: 1989 thunderx_l2c_isr = thunderx_l2c_mci_isr; 1990 l2c_devattr = l2c_mci_dfs_ents; 1991 dfs_entries = ARRAY_SIZE(l2c_mci_dfs_ents); 1992 fmt = "L2C-MCI%d"; 1993 reg_en_offs = L2C_MCI_INT_ENA_W1S; 1994 reg_en_mask = L2C_MCI_INT_ENA_ALL; 1995 break; 1996 default: 1997 //Should never ever get here 1998 dev_err(&pdev->dev, "Unsupported PCI device: %04x\n", 1999 pdev->device); 2000 return -EINVAL; 2001 } 2002 2003 idx = edac_device_alloc_index(); 2004 snprintf(name, sizeof(name), fmt, idx); 2005 2006 edac_dev = edac_device_alloc_ctl_info(sizeof(struct thunderx_l2c), 2007 name, 1, "L2C", 1, 0, 2008 NULL, 0, idx); 2009 if (!edac_dev) { 2010 dev_err(&pdev->dev, "Cannot allocate EDAC device\n"); 2011 return -ENOMEM; 2012 } 2013 2014 l2c = edac_dev->pvt_info; 2015 l2c->edac_dev = edac_dev; 2016 2017 l2c->regs = pcim_iomap_table(pdev)[0]; 2018 if (!l2c->regs) { 2019 dev_err(&pdev->dev, "Cannot map PCI resources\n"); 2020 ret = -ENODEV; 2021 goto err_free; 2022 } 2023 2024 l2c->pdev = pdev; 2025 2026 l2c->ring_head = 0; 2027 l2c->ring_tail = 0; 2028 2029 l2c->msix_ent.entry = 0; 2030 l2c->msix_ent.vector = 0; 2031 2032 ret = pci_enable_msix_exact(pdev, &l2c->msix_ent, 1); 2033 if (ret) { 2034 dev_err(&pdev->dev, "Cannot enable interrupt: %d\n", ret); 2035 goto err_free; 2036 } 2037 2038 ret = devm_request_threaded_irq(&pdev->dev, l2c->msix_ent.vector, 2039 thunderx_l2c_isr, 2040 thunderx_l2c_threaded_isr, 2041 0, "[EDAC] ThunderX L2C", 2042 &l2c->msix_ent); 2043 if (ret) 2044 goto err_free; 2045 2046 edac_dev->dev = &pdev->dev; 2047 edac_dev->dev_name = dev_name(&pdev->dev); 2048 edac_dev->mod_name = "thunderx-l2c"; 2049 edac_dev->ctl_name = "thunderx-l2c"; 2050 2051 ret = edac_device_add_device(edac_dev); 2052 if (ret) { 2053 dev_err(&pdev->dev, "Cannot add EDAC device: %d\n", ret); 2054 goto err_free; 2055 } 2056 2057 if (IS_ENABLED(CONFIG_EDAC_DEBUG)) { 2058 l2c->debugfs = edac_debugfs_create_dir(pdev->dev.kobj.name); 2059 2060 ret = thunderx_create_debugfs_nodes(l2c->debugfs, l2c_devattr, 2061 l2c, dfs_entries); 2062 2063 if (ret != dfs_entries) { 2064 dev_warn(&pdev->dev, "Error creating debugfs entries: %d%s\n", 2065 ret, ret >= 0 ? " created" : ""); 2066 } 2067 } 2068 2069 pci_set_drvdata(pdev, edac_dev); 2070 2071 writeq(reg_en_mask, l2c->regs + reg_en_offs); 2072 2073 return 0; 2074 2075 err_free: 2076 edac_device_free_ctl_info(edac_dev); 2077 2078 return ret; 2079 } 2080 2081 static void thunderx_l2c_remove(struct pci_dev *pdev) 2082 { 2083 struct edac_device_ctl_info *edac_dev = pci_get_drvdata(pdev); 2084 struct thunderx_l2c *l2c = edac_dev->pvt_info; 2085 2086 switch (pdev->device) { 2087 case PCI_DEVICE_ID_THUNDER_L2C_TAD: 2088 writeq(L2C_TAD_INT_ENA_ALL, l2c->regs + L2C_TAD_INT_ENA_W1C); 2089 break; 2090 case PCI_DEVICE_ID_THUNDER_L2C_CBC: 2091 writeq(L2C_CBC_INT_ENA_ALL, l2c->regs + L2C_CBC_INT_ENA_W1C); 2092 break; 2093 case PCI_DEVICE_ID_THUNDER_L2C_MCI: 2094 writeq(L2C_MCI_INT_ENA_ALL, l2c->regs + L2C_MCI_INT_ENA_W1C); 2095 break; 2096 } 2097 2098 edac_debugfs_remove_recursive(l2c->debugfs); 2099 2100 edac_device_del_device(&pdev->dev); 2101 edac_device_free_ctl_info(edac_dev); 2102 } 2103 2104 MODULE_DEVICE_TABLE(pci, thunderx_l2c_pci_tbl); 2105 2106 static struct pci_driver thunderx_l2c_driver = { 2107 .name = "thunderx_l2c_edac", 2108 .probe = thunderx_l2c_probe, 2109 .remove = thunderx_l2c_remove, 2110 .id_table = thunderx_l2c_pci_tbl, 2111 }; 2112 2113 static int __init thunderx_edac_init(void) 2114 { 2115 int rc = 0; 2116 2117 if (ghes_get_devices()) 2118 return -EBUSY; 2119 2120 rc = pci_register_driver(&thunderx_lmc_driver); 2121 if (rc) 2122 return rc; 2123 2124 rc = pci_register_driver(&thunderx_ocx_driver); 2125 if (rc) 2126 goto err_lmc; 2127 2128 rc = pci_register_driver(&thunderx_l2c_driver); 2129 if (rc) 2130 goto err_ocx; 2131 2132 return rc; 2133 err_ocx: 2134 pci_unregister_driver(&thunderx_ocx_driver); 2135 err_lmc: 2136 pci_unregister_driver(&thunderx_lmc_driver); 2137 2138 return rc; 2139 } 2140 2141 static void __exit thunderx_edac_exit(void) 2142 { 2143 pci_unregister_driver(&thunderx_l2c_driver); 2144 pci_unregister_driver(&thunderx_ocx_driver); 2145 pci_unregister_driver(&thunderx_lmc_driver); 2146 2147 } 2148 2149 module_init(thunderx_edac_init); 2150 module_exit(thunderx_edac_exit); 2151 2152 MODULE_LICENSE("GPL v2"); 2153 MODULE_AUTHOR("Cavium, Inc."); 2154 MODULE_DESCRIPTION("EDAC Driver for Cavium ThunderX"); 2155