1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * IBM Accelerator Family 'GenWQE' 4 * 5 * (C) Copyright IBM Corp. 2013 6 * 7 * Author: Frank Haverkamp <haver@linux.vnet.ibm.com> 8 * Author: Joerg-Stephan Vogt <jsvogt@de.ibm.com> 9 * Author: Michael Jung <mijung@gmx.net> 10 * Author: Michael Ruettger <michael@ibmra.de> 11 */ 12 13 /* 14 * Miscelanous functionality used in the other GenWQE driver parts. 15 */ 16 17 #include <linux/kernel.h> 18 #include <linux/sched.h> 19 #include <linux/vmalloc.h> 20 #include <linux/page-flags.h> 21 #include <linux/scatterlist.h> 22 #include <linux/hugetlb.h> 23 #include <linux/iommu.h> 24 #include <linux/pci.h> 25 #include <linux/dma-mapping.h> 26 #include <linux/ctype.h> 27 #include <linux/module.h> 28 #include <linux/platform_device.h> 29 #include <linux/delay.h> 30 #include <linux/pgtable.h> 31 32 #include "genwqe_driver.h" 33 #include "card_base.h" 34 #include "card_ddcb.h" 35 36 /** 37 * __genwqe_writeq() - Write 64-bit register 38 * @cd: genwqe device descriptor 39 * @byte_offs: byte offset within BAR 40 * @val: 64-bit value 41 * 42 * Return: 0 if success; < 0 if error 43 */ 44 int __genwqe_writeq(struct genwqe_dev *cd, u64 byte_offs, u64 val) 45 { 46 struct pci_dev *pci_dev = cd->pci_dev; 47 48 if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) 49 return -EIO; 50 51 if (cd->mmio == NULL) 52 return -EIO; 53 54 if (pci_channel_offline(pci_dev)) 55 return -EIO; 56 57 __raw_writeq((__force u64)cpu_to_be64(val), cd->mmio + byte_offs); 58 return 0; 59 } 60 61 /** 62 * __genwqe_readq() - Read 64-bit register 63 * @cd: genwqe device descriptor 64 * @byte_offs: offset within BAR 65 * 66 * Return: value from register 67 */ 68 u64 __genwqe_readq(struct genwqe_dev *cd, u64 byte_offs) 69 { 70 if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) 71 return 0xffffffffffffffffull; 72 73 if ((cd->err_inject & GENWQE_INJECT_GFIR_FATAL) && 74 (byte_offs == IO_SLC_CFGREG_GFIR)) 75 return 0x000000000000ffffull; 76 77 if ((cd->err_inject & GENWQE_INJECT_GFIR_INFO) && 78 (byte_offs == IO_SLC_CFGREG_GFIR)) 79 return 0x00000000ffff0000ull; 80 81 if (cd->mmio == NULL) 82 return 0xffffffffffffffffull; 83 84 return be64_to_cpu((__force __be64)__raw_readq(cd->mmio + byte_offs)); 85 } 86 87 /** 88 * __genwqe_writel() - Write 32-bit register 89 * @cd: genwqe device descriptor 90 * @byte_offs: byte offset within BAR 91 * @val: 32-bit value 92 * 93 * Return: 0 if success; < 0 if error 94 */ 95 int __genwqe_writel(struct genwqe_dev *cd, u64 byte_offs, u32 val) 96 { 97 struct pci_dev *pci_dev = cd->pci_dev; 98 99 if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) 100 return -EIO; 101 102 if (cd->mmio == NULL) 103 return -EIO; 104 105 if (pci_channel_offline(pci_dev)) 106 return -EIO; 107 108 __raw_writel((__force u32)cpu_to_be32(val), cd->mmio + byte_offs); 109 return 0; 110 } 111 112 /** 113 * __genwqe_readl() - Read 32-bit register 114 * @cd: genwqe device descriptor 115 * @byte_offs: offset within BAR 116 * 117 * Return: Value from register 118 */ 119 u32 __genwqe_readl(struct genwqe_dev *cd, u64 byte_offs) 120 { 121 if (cd->err_inject & GENWQE_INJECT_HARDWARE_FAILURE) 122 return 0xffffffff; 123 124 if (cd->mmio == NULL) 125 return 0xffffffff; 126 127 return be32_to_cpu((__force __be32)__raw_readl(cd->mmio + byte_offs)); 128 } 129 130 /** 131 * genwqe_read_app_id() - Extract app_id 132 * @cd: genwqe device descriptor 133 * @app_name: carrier used to pass-back name 134 * @len: length of data for name 135 * 136 * app_unitcfg need to be filled with valid data first 137 */ 138 int genwqe_read_app_id(struct genwqe_dev *cd, char *app_name, int len) 139 { 140 int i, j; 141 u32 app_id = (u32)cd->app_unitcfg; 142 143 memset(app_name, 0, len); 144 for (i = 0, j = 0; j < min(len, 4); j++) { 145 char ch = (char)((app_id >> (24 - j*8)) & 0xff); 146 147 if (ch == ' ') 148 continue; 149 app_name[i++] = isprint(ch) ? ch : 'X'; 150 } 151 return i; 152 } 153 154 /** 155 * genwqe_init_crc32() - Prepare a lookup table for fast crc32 calculations 156 * 157 * Existing kernel functions seem to use a different polynom, 158 * therefore we could not use them here. 159 * 160 * Genwqe's Polynomial = 0x20044009 161 */ 162 #define CRC32_POLYNOMIAL 0x20044009 163 static u32 crc32_tab[256]; /* crc32 lookup table */ 164 165 void genwqe_init_crc32(void) 166 { 167 int i, j; 168 u32 crc; 169 170 for (i = 0; i < 256; i++) { 171 crc = i << 24; 172 for (j = 0; j < 8; j++) { 173 if (crc & 0x80000000) 174 crc = (crc << 1) ^ CRC32_POLYNOMIAL; 175 else 176 crc = (crc << 1); 177 } 178 crc32_tab[i] = crc; 179 } 180 } 181 182 /** 183 * genwqe_crc32() - Generate 32-bit crc as required for DDCBs 184 * @buff: pointer to data buffer 185 * @len: length of data for calculation 186 * @init: initial crc (0xffffffff at start) 187 * 188 * polynomial = x^32 * + x^29 + x^18 + x^14 + x^3 + 1 (0x20044009) 189 * 190 * Example: 4 bytes 0x01 0x02 0x03 0x04 with init=0xffffffff should 191 * result in a crc32 of 0xf33cb7d3. 192 * 193 * The existing kernel crc functions did not cover this polynom yet. 194 * 195 * Return: crc32 checksum. 196 */ 197 u32 genwqe_crc32(u8 *buff, size_t len, u32 init) 198 { 199 int i; 200 u32 crc; 201 202 crc = init; 203 while (len--) { 204 i = ((crc >> 24) ^ *buff++) & 0xFF; 205 crc = (crc << 8) ^ crc32_tab[i]; 206 } 207 return crc; 208 } 209 210 void *__genwqe_alloc_consistent(struct genwqe_dev *cd, size_t size, 211 dma_addr_t *dma_handle) 212 { 213 if (get_order(size) >= MAX_ORDER) 214 return NULL; 215 216 return dma_alloc_coherent(&cd->pci_dev->dev, size, dma_handle, 217 GFP_KERNEL); 218 } 219 220 void __genwqe_free_consistent(struct genwqe_dev *cd, size_t size, 221 void *vaddr, dma_addr_t dma_handle) 222 { 223 if (vaddr == NULL) 224 return; 225 226 dma_free_coherent(&cd->pci_dev->dev, size, vaddr, dma_handle); 227 } 228 229 static void genwqe_unmap_pages(struct genwqe_dev *cd, dma_addr_t *dma_list, 230 int num_pages) 231 { 232 int i; 233 struct pci_dev *pci_dev = cd->pci_dev; 234 235 for (i = 0; (i < num_pages) && (dma_list[i] != 0x0); i++) { 236 dma_unmap_page(&pci_dev->dev, dma_list[i], PAGE_SIZE, 237 DMA_BIDIRECTIONAL); 238 dma_list[i] = 0x0; 239 } 240 } 241 242 static int genwqe_map_pages(struct genwqe_dev *cd, 243 struct page **page_list, int num_pages, 244 dma_addr_t *dma_list) 245 { 246 int i; 247 struct pci_dev *pci_dev = cd->pci_dev; 248 249 /* establish DMA mapping for requested pages */ 250 for (i = 0; i < num_pages; i++) { 251 dma_addr_t daddr; 252 253 dma_list[i] = 0x0; 254 daddr = dma_map_page(&pci_dev->dev, page_list[i], 255 0, /* map_offs */ 256 PAGE_SIZE, 257 DMA_BIDIRECTIONAL); /* FIXME rd/rw */ 258 259 if (dma_mapping_error(&pci_dev->dev, daddr)) { 260 dev_err(&pci_dev->dev, 261 "[%s] err: no dma addr daddr=%016llx!\n", 262 __func__, (long long)daddr); 263 goto err; 264 } 265 266 dma_list[i] = daddr; 267 } 268 return 0; 269 270 err: 271 genwqe_unmap_pages(cd, dma_list, num_pages); 272 return -EIO; 273 } 274 275 static int genwqe_sgl_size(int num_pages) 276 { 277 int len, num_tlb = num_pages / 7; 278 279 len = sizeof(struct sg_entry) * (num_pages+num_tlb + 1); 280 return roundup(len, PAGE_SIZE); 281 } 282 283 /* 284 * genwqe_alloc_sync_sgl() - Allocate memory for sgl and overlapping pages 285 * 286 * Allocates memory for sgl and overlapping pages. Pages which might 287 * overlap other user-space memory blocks are being cached for DMAs, 288 * such that we do not run into syncronization issues. Data is copied 289 * from user-space into the cached pages. 290 */ 291 int genwqe_alloc_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, 292 void __user *user_addr, size_t user_size, int write) 293 { 294 int ret = -ENOMEM; 295 struct pci_dev *pci_dev = cd->pci_dev; 296 297 sgl->fpage_offs = offset_in_page((unsigned long)user_addr); 298 sgl->fpage_size = min_t(size_t, PAGE_SIZE-sgl->fpage_offs, user_size); 299 sgl->nr_pages = DIV_ROUND_UP(sgl->fpage_offs + user_size, PAGE_SIZE); 300 sgl->lpage_size = (user_size - sgl->fpage_size) % PAGE_SIZE; 301 302 dev_dbg(&pci_dev->dev, "[%s] uaddr=%p usize=%8ld nr_pages=%ld fpage_offs=%lx fpage_size=%ld lpage_size=%ld\n", 303 __func__, user_addr, user_size, sgl->nr_pages, 304 sgl->fpage_offs, sgl->fpage_size, sgl->lpage_size); 305 306 sgl->user_addr = user_addr; 307 sgl->user_size = user_size; 308 sgl->write = write; 309 sgl->sgl_size = genwqe_sgl_size(sgl->nr_pages); 310 311 if (get_order(sgl->sgl_size) > MAX_ORDER) { 312 dev_err(&pci_dev->dev, 313 "[%s] err: too much memory requested!\n", __func__); 314 return ret; 315 } 316 317 sgl->sgl = __genwqe_alloc_consistent(cd, sgl->sgl_size, 318 &sgl->sgl_dma_addr); 319 if (sgl->sgl == NULL) { 320 dev_err(&pci_dev->dev, 321 "[%s] err: no memory available!\n", __func__); 322 return ret; 323 } 324 325 /* Only use buffering on incomplete pages */ 326 if ((sgl->fpage_size != 0) && (sgl->fpage_size != PAGE_SIZE)) { 327 sgl->fpage = __genwqe_alloc_consistent(cd, PAGE_SIZE, 328 &sgl->fpage_dma_addr); 329 if (sgl->fpage == NULL) 330 goto err_out; 331 332 /* Sync with user memory */ 333 if (copy_from_user(sgl->fpage + sgl->fpage_offs, 334 user_addr, sgl->fpage_size)) { 335 ret = -EFAULT; 336 goto err_out; 337 } 338 } 339 if (sgl->lpage_size != 0) { 340 sgl->lpage = __genwqe_alloc_consistent(cd, PAGE_SIZE, 341 &sgl->lpage_dma_addr); 342 if (sgl->lpage == NULL) 343 goto err_out1; 344 345 /* Sync with user memory */ 346 if (copy_from_user(sgl->lpage, user_addr + user_size - 347 sgl->lpage_size, sgl->lpage_size)) { 348 ret = -EFAULT; 349 goto err_out2; 350 } 351 } 352 return 0; 353 354 err_out2: 355 __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage, 356 sgl->lpage_dma_addr); 357 sgl->lpage = NULL; 358 sgl->lpage_dma_addr = 0; 359 err_out1: 360 __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, 361 sgl->fpage_dma_addr); 362 sgl->fpage = NULL; 363 sgl->fpage_dma_addr = 0; 364 err_out: 365 __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, 366 sgl->sgl_dma_addr); 367 sgl->sgl = NULL; 368 sgl->sgl_dma_addr = 0; 369 sgl->sgl_size = 0; 370 371 return ret; 372 } 373 374 int genwqe_setup_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl, 375 dma_addr_t *dma_list) 376 { 377 int i = 0, j = 0, p; 378 unsigned long dma_offs, map_offs; 379 dma_addr_t prev_daddr = 0; 380 struct sg_entry *s, *last_s = NULL; 381 size_t size = sgl->user_size; 382 383 dma_offs = 128; /* next block if needed/dma_offset */ 384 map_offs = sgl->fpage_offs; /* offset in first page */ 385 386 s = &sgl->sgl[0]; /* first set of 8 entries */ 387 p = 0; /* page */ 388 while (p < sgl->nr_pages) { 389 dma_addr_t daddr; 390 unsigned int size_to_map; 391 392 /* always write the chaining entry, cleanup is done later */ 393 j = 0; 394 s[j].target_addr = cpu_to_be64(sgl->sgl_dma_addr + dma_offs); 395 s[j].len = cpu_to_be32(128); 396 s[j].flags = cpu_to_be32(SG_CHAINED); 397 j++; 398 399 while (j < 8) { 400 /* DMA mapping for requested page, offs, size */ 401 size_to_map = min(size, PAGE_SIZE - map_offs); 402 403 if ((p == 0) && (sgl->fpage != NULL)) { 404 daddr = sgl->fpage_dma_addr + map_offs; 405 406 } else if ((p == sgl->nr_pages - 1) && 407 (sgl->lpage != NULL)) { 408 daddr = sgl->lpage_dma_addr; 409 } else { 410 daddr = dma_list[p] + map_offs; 411 } 412 413 size -= size_to_map; 414 map_offs = 0; 415 416 if (prev_daddr == daddr) { 417 u32 prev_len = be32_to_cpu(last_s->len); 418 419 /* pr_info("daddr combining: " 420 "%016llx/%08x -> %016llx\n", 421 prev_daddr, prev_len, daddr); */ 422 423 last_s->len = cpu_to_be32(prev_len + 424 size_to_map); 425 426 p++; /* process next page */ 427 if (p == sgl->nr_pages) 428 goto fixup; /* nothing to do */ 429 430 prev_daddr = daddr + size_to_map; 431 continue; 432 } 433 434 /* start new entry */ 435 s[j].target_addr = cpu_to_be64(daddr); 436 s[j].len = cpu_to_be32(size_to_map); 437 s[j].flags = cpu_to_be32(SG_DATA); 438 prev_daddr = daddr + size_to_map; 439 last_s = &s[j]; 440 j++; 441 442 p++; /* process next page */ 443 if (p == sgl->nr_pages) 444 goto fixup; /* nothing to do */ 445 } 446 dma_offs += 128; 447 s += 8; /* continue 8 elements further */ 448 } 449 fixup: 450 if (j == 1) { /* combining happened on last entry! */ 451 s -= 8; /* full shift needed on previous sgl block */ 452 j = 7; /* shift all elements */ 453 } 454 455 for (i = 0; i < j; i++) /* move elements 1 up */ 456 s[i] = s[i + 1]; 457 458 s[i].target_addr = cpu_to_be64(0); 459 s[i].len = cpu_to_be32(0); 460 s[i].flags = cpu_to_be32(SG_END_LIST); 461 return 0; 462 } 463 464 /** 465 * genwqe_free_sync_sgl() - Free memory for sgl and overlapping pages 466 * @cd: genwqe device descriptor 467 * @sgl: scatter gather list describing user-space memory 468 * 469 * After the DMA transfer has been completed we free the memory for 470 * the sgl and the cached pages. Data is being transferred from cached 471 * pages into user-space buffers. 472 */ 473 int genwqe_free_sync_sgl(struct genwqe_dev *cd, struct genwqe_sgl *sgl) 474 { 475 int rc = 0; 476 size_t offset; 477 unsigned long res; 478 struct pci_dev *pci_dev = cd->pci_dev; 479 480 if (sgl->fpage) { 481 if (sgl->write) { 482 res = copy_to_user(sgl->user_addr, 483 sgl->fpage + sgl->fpage_offs, sgl->fpage_size); 484 if (res) { 485 dev_err(&pci_dev->dev, 486 "[%s] err: copying fpage! (res=%lu)\n", 487 __func__, res); 488 rc = -EFAULT; 489 } 490 } 491 __genwqe_free_consistent(cd, PAGE_SIZE, sgl->fpage, 492 sgl->fpage_dma_addr); 493 sgl->fpage = NULL; 494 sgl->fpage_dma_addr = 0; 495 } 496 if (sgl->lpage) { 497 if (sgl->write) { 498 offset = sgl->user_size - sgl->lpage_size; 499 res = copy_to_user(sgl->user_addr + offset, sgl->lpage, 500 sgl->lpage_size); 501 if (res) { 502 dev_err(&pci_dev->dev, 503 "[%s] err: copying lpage! (res=%lu)\n", 504 __func__, res); 505 rc = -EFAULT; 506 } 507 } 508 __genwqe_free_consistent(cd, PAGE_SIZE, sgl->lpage, 509 sgl->lpage_dma_addr); 510 sgl->lpage = NULL; 511 sgl->lpage_dma_addr = 0; 512 } 513 __genwqe_free_consistent(cd, sgl->sgl_size, sgl->sgl, 514 sgl->sgl_dma_addr); 515 516 sgl->sgl = NULL; 517 sgl->sgl_dma_addr = 0x0; 518 sgl->sgl_size = 0; 519 return rc; 520 } 521 522 /** 523 * genwqe_user_vmap() - Map user-space memory to virtual kernel memory 524 * @cd: pointer to genwqe device 525 * @m: mapping params 526 * @uaddr: user virtual address 527 * @size: size of memory to be mapped 528 * 529 * We need to think about how we could speed this up. Of course it is 530 * not a good idea to do this over and over again, like we are 531 * currently doing it. Nevertheless, I am curious where on the path 532 * the performance is spend. Most probably within the memory 533 * allocation functions, but maybe also in the DMA mapping code. 534 * 535 * Restrictions: The maximum size of the possible mapping currently depends 536 * on the amount of memory we can get using kzalloc() for the 537 * page_list and pci_alloc_consistent for the sg_list. 538 * The sg_list is currently itself not scattered, which could 539 * be fixed with some effort. The page_list must be split into 540 * PAGE_SIZE chunks too. All that will make the complicated 541 * code more complicated. 542 * 543 * Return: 0 if success 544 */ 545 int genwqe_user_vmap(struct genwqe_dev *cd, struct dma_mapping *m, void *uaddr, 546 unsigned long size) 547 { 548 int rc = -EINVAL; 549 unsigned long data, offs; 550 struct pci_dev *pci_dev = cd->pci_dev; 551 552 if ((uaddr == NULL) || (size == 0)) { 553 m->size = 0; /* mark unused and not added */ 554 return -EINVAL; 555 } 556 m->u_vaddr = uaddr; 557 m->size = size; 558 559 /* determine space needed for page_list. */ 560 data = (unsigned long)uaddr; 561 offs = offset_in_page(data); 562 if (size > ULONG_MAX - PAGE_SIZE - offs) { 563 m->size = 0; /* mark unused and not added */ 564 return -EINVAL; 565 } 566 m->nr_pages = DIV_ROUND_UP(offs + size, PAGE_SIZE); 567 568 m->page_list = kcalloc(m->nr_pages, 569 sizeof(struct page *) + sizeof(dma_addr_t), 570 GFP_KERNEL); 571 if (!m->page_list) { 572 dev_err(&pci_dev->dev, "err: alloc page_list failed\n"); 573 m->nr_pages = 0; 574 m->u_vaddr = NULL; 575 m->size = 0; /* mark unused and not added */ 576 return -ENOMEM; 577 } 578 m->dma_list = (dma_addr_t *)(m->page_list + m->nr_pages); 579 580 /* pin user pages in memory */ 581 rc = pin_user_pages_fast(data & PAGE_MASK, /* page aligned addr */ 582 m->nr_pages, 583 m->write ? FOLL_WRITE : 0, /* readable/writable */ 584 m->page_list); /* ptrs to pages */ 585 if (rc < 0) 586 goto fail_pin_user_pages; 587 588 /* assumption: pin_user_pages can be killed by signals. */ 589 if (rc < m->nr_pages) { 590 unpin_user_pages_dirty_lock(m->page_list, rc, m->write); 591 rc = -EFAULT; 592 goto fail_pin_user_pages; 593 } 594 595 rc = genwqe_map_pages(cd, m->page_list, m->nr_pages, m->dma_list); 596 if (rc != 0) 597 goto fail_free_user_pages; 598 599 return 0; 600 601 fail_free_user_pages: 602 unpin_user_pages_dirty_lock(m->page_list, m->nr_pages, m->write); 603 604 fail_pin_user_pages: 605 kfree(m->page_list); 606 m->page_list = NULL; 607 m->dma_list = NULL; 608 m->nr_pages = 0; 609 m->u_vaddr = NULL; 610 m->size = 0; /* mark unused and not added */ 611 return rc; 612 } 613 614 /** 615 * genwqe_user_vunmap() - Undo mapping of user-space mem to virtual kernel 616 * memory 617 * @cd: pointer to genwqe device 618 * @m: mapping params 619 */ 620 int genwqe_user_vunmap(struct genwqe_dev *cd, struct dma_mapping *m) 621 { 622 struct pci_dev *pci_dev = cd->pci_dev; 623 624 if (!dma_mapping_used(m)) { 625 dev_err(&pci_dev->dev, "[%s] err: mapping %p not used!\n", 626 __func__, m); 627 return -EINVAL; 628 } 629 630 if (m->dma_list) 631 genwqe_unmap_pages(cd, m->dma_list, m->nr_pages); 632 633 if (m->page_list) { 634 unpin_user_pages_dirty_lock(m->page_list, m->nr_pages, 635 m->write); 636 kfree(m->page_list); 637 m->page_list = NULL; 638 m->dma_list = NULL; 639 m->nr_pages = 0; 640 } 641 642 m->u_vaddr = NULL; 643 m->size = 0; /* mark as unused and not added */ 644 return 0; 645 } 646 647 /** 648 * genwqe_card_type() - Get chip type SLU Configuration Register 649 * @cd: pointer to the genwqe device descriptor 650 * Return: 0: Altera Stratix-IV 230 651 * 1: Altera Stratix-IV 530 652 * 2: Altera Stratix-V A4 653 * 3: Altera Stratix-V A7 654 */ 655 u8 genwqe_card_type(struct genwqe_dev *cd) 656 { 657 u64 card_type = cd->slu_unitcfg; 658 659 return (u8)((card_type & IO_SLU_UNITCFG_TYPE_MASK) >> 20); 660 } 661 662 /** 663 * genwqe_card_reset() - Reset the card 664 * @cd: pointer to the genwqe device descriptor 665 */ 666 int genwqe_card_reset(struct genwqe_dev *cd) 667 { 668 u64 softrst; 669 struct pci_dev *pci_dev = cd->pci_dev; 670 671 if (!genwqe_is_privileged(cd)) 672 return -ENODEV; 673 674 /* new SL */ 675 __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, 0x1ull); 676 msleep(1000); 677 __genwqe_readq(cd, IO_HSU_FIR_CLR); 678 __genwqe_readq(cd, IO_APP_FIR_CLR); 679 __genwqe_readq(cd, IO_SLU_FIR_CLR); 680 681 /* 682 * Read-modify-write to preserve the stealth bits 683 * 684 * For SL >= 039, Stealth WE bit allows removing 685 * the read-modify-wrote. 686 * r-m-w may require a mask 0x3C to avoid hitting hard 687 * reset again for error reset (should be 0, chicken). 688 */ 689 softrst = __genwqe_readq(cd, IO_SLC_CFGREG_SOFTRESET) & 0x3cull; 690 __genwqe_writeq(cd, IO_SLC_CFGREG_SOFTRESET, softrst | 0x2ull); 691 692 /* give ERRORRESET some time to finish */ 693 msleep(50); 694 695 if (genwqe_need_err_masking(cd)) { 696 dev_info(&pci_dev->dev, 697 "[%s] masking errors for old bitstreams\n", __func__); 698 __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); 699 } 700 return 0; 701 } 702 703 int genwqe_read_softreset(struct genwqe_dev *cd) 704 { 705 u64 bitstream; 706 707 if (!genwqe_is_privileged(cd)) 708 return -ENODEV; 709 710 bitstream = __genwqe_readq(cd, IO_SLU_BITSTREAM) & 0x1; 711 cd->softreset = (bitstream == 0) ? 0x8ull : 0xcull; 712 return 0; 713 } 714 715 /** 716 * genwqe_set_interrupt_capability() - Configure MSI capability structure 717 * @cd: pointer to the device 718 * @count: number of vectors to allocate 719 * Return: 0 if no error 720 */ 721 int genwqe_set_interrupt_capability(struct genwqe_dev *cd, int count) 722 { 723 int rc; 724 725 rc = pci_alloc_irq_vectors(cd->pci_dev, 1, count, PCI_IRQ_MSI); 726 if (rc < 0) 727 return rc; 728 return 0; 729 } 730 731 /** 732 * genwqe_reset_interrupt_capability() - Undo genwqe_set_interrupt_capability() 733 * @cd: pointer to the device 734 */ 735 void genwqe_reset_interrupt_capability(struct genwqe_dev *cd) 736 { 737 pci_free_irq_vectors(cd->pci_dev); 738 } 739 740 /** 741 * set_reg_idx() - Fill array with data. Ignore illegal offsets. 742 * @cd: card device 743 * @r: debug register array 744 * @i: index to desired entry 745 * @m: maximum possible entries 746 * @addr: addr which is read 747 * @idx: index in debug array 748 * @val: read value 749 */ 750 static int set_reg_idx(struct genwqe_dev *cd, struct genwqe_reg *r, 751 unsigned int *i, unsigned int m, u32 addr, u32 idx, 752 u64 val) 753 { 754 if (WARN_ON_ONCE(*i >= m)) 755 return -EFAULT; 756 757 r[*i].addr = addr; 758 r[*i].idx = idx; 759 r[*i].val = val; 760 ++*i; 761 return 0; 762 } 763 764 static int set_reg(struct genwqe_dev *cd, struct genwqe_reg *r, 765 unsigned int *i, unsigned int m, u32 addr, u64 val) 766 { 767 return set_reg_idx(cd, r, i, m, addr, 0, val); 768 } 769 770 int genwqe_read_ffdc_regs(struct genwqe_dev *cd, struct genwqe_reg *regs, 771 unsigned int max_regs, int all) 772 { 773 unsigned int i, j, idx = 0; 774 u32 ufir_addr, ufec_addr, sfir_addr, sfec_addr; 775 u64 gfir, sluid, appid, ufir, ufec, sfir, sfec; 776 777 /* Global FIR */ 778 gfir = __genwqe_readq(cd, IO_SLC_CFGREG_GFIR); 779 set_reg(cd, regs, &idx, max_regs, IO_SLC_CFGREG_GFIR, gfir); 780 781 /* UnitCfg for SLU */ 782 sluid = __genwqe_readq(cd, IO_SLU_UNITCFG); /* 0x00000000 */ 783 set_reg(cd, regs, &idx, max_regs, IO_SLU_UNITCFG, sluid); 784 785 /* UnitCfg for APP */ 786 appid = __genwqe_readq(cd, IO_APP_UNITCFG); /* 0x02000000 */ 787 set_reg(cd, regs, &idx, max_regs, IO_APP_UNITCFG, appid); 788 789 /* Check all chip Units */ 790 for (i = 0; i < GENWQE_MAX_UNITS; i++) { 791 792 /* Unit FIR */ 793 ufir_addr = (i << 24) | 0x008; 794 ufir = __genwqe_readq(cd, ufir_addr); 795 set_reg(cd, regs, &idx, max_regs, ufir_addr, ufir); 796 797 /* Unit FEC */ 798 ufec_addr = (i << 24) | 0x018; 799 ufec = __genwqe_readq(cd, ufec_addr); 800 set_reg(cd, regs, &idx, max_regs, ufec_addr, ufec); 801 802 for (j = 0; j < 64; j++) { 803 /* wherever there is a primary 1, read the 2ndary */ 804 if (!all && (!(ufir & (1ull << j)))) 805 continue; 806 807 sfir_addr = (i << 24) | (0x100 + 8 * j); 808 sfir = __genwqe_readq(cd, sfir_addr); 809 set_reg(cd, regs, &idx, max_regs, sfir_addr, sfir); 810 811 sfec_addr = (i << 24) | (0x300 + 8 * j); 812 sfec = __genwqe_readq(cd, sfec_addr); 813 set_reg(cd, regs, &idx, max_regs, sfec_addr, sfec); 814 } 815 } 816 817 /* fill with invalid data until end */ 818 for (i = idx; i < max_regs; i++) { 819 regs[i].addr = 0xffffffff; 820 regs[i].val = 0xffffffffffffffffull; 821 } 822 return idx; 823 } 824 825 /** 826 * genwqe_ffdc_buff_size() - Calculates the number of dump registers 827 * @cd: genwqe device descriptor 828 * @uid: unit ID 829 */ 830 int genwqe_ffdc_buff_size(struct genwqe_dev *cd, int uid) 831 { 832 int entries = 0, ring, traps, traces, trace_entries; 833 u32 eevptr_addr, l_addr, d_len, d_type; 834 u64 eevptr, val, addr; 835 836 eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; 837 eevptr = __genwqe_readq(cd, eevptr_addr); 838 839 if ((eevptr != 0x0) && (eevptr != -1ull)) { 840 l_addr = GENWQE_UID_OFFS(uid) | eevptr; 841 842 while (1) { 843 val = __genwqe_readq(cd, l_addr); 844 845 if ((val == 0x0) || (val == -1ull)) 846 break; 847 848 /* 38:24 */ 849 d_len = (val & 0x0000007fff000000ull) >> 24; 850 851 /* 39 */ 852 d_type = (val & 0x0000008000000000ull) >> 36; 853 854 if (d_type) { /* repeat */ 855 entries += d_len; 856 } else { /* size in bytes! */ 857 entries += d_len >> 3; 858 } 859 860 l_addr += 8; 861 } 862 } 863 864 for (ring = 0; ring < 8; ring++) { 865 addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); 866 val = __genwqe_readq(cd, addr); 867 868 if ((val == 0x0ull) || (val == -1ull)) 869 continue; 870 871 traps = (val >> 24) & 0xff; 872 traces = (val >> 16) & 0xff; 873 trace_entries = val & 0xffff; 874 875 entries += traps + (traces * trace_entries); 876 } 877 return entries; 878 } 879 880 /** 881 * genwqe_ffdc_buff_read() - Implements LogoutExtendedErrorRegisters procedure 882 * @cd: genwqe device descriptor 883 * @uid: unit ID 884 * @regs: register information 885 * @max_regs: number of register entries 886 */ 887 int genwqe_ffdc_buff_read(struct genwqe_dev *cd, int uid, 888 struct genwqe_reg *regs, unsigned int max_regs) 889 { 890 int i, traps, traces, trace, trace_entries, trace_entry, ring; 891 unsigned int idx = 0; 892 u32 eevptr_addr, l_addr, d_addr, d_len, d_type; 893 u64 eevptr, e, val, addr; 894 895 eevptr_addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_ERROR_POINTER; 896 eevptr = __genwqe_readq(cd, eevptr_addr); 897 898 if ((eevptr != 0x0) && (eevptr != 0xffffffffffffffffull)) { 899 l_addr = GENWQE_UID_OFFS(uid) | eevptr; 900 while (1) { 901 e = __genwqe_readq(cd, l_addr); 902 if ((e == 0x0) || (e == 0xffffffffffffffffull)) 903 break; 904 905 d_addr = (e & 0x0000000000ffffffull); /* 23:0 */ 906 d_len = (e & 0x0000007fff000000ull) >> 24; /* 38:24 */ 907 d_type = (e & 0x0000008000000000ull) >> 36; /* 39 */ 908 d_addr |= GENWQE_UID_OFFS(uid); 909 910 if (d_type) { 911 for (i = 0; i < (int)d_len; i++) { 912 val = __genwqe_readq(cd, d_addr); 913 set_reg_idx(cd, regs, &idx, max_regs, 914 d_addr, i, val); 915 } 916 } else { 917 d_len >>= 3; /* Size in bytes! */ 918 for (i = 0; i < (int)d_len; i++, d_addr += 8) { 919 val = __genwqe_readq(cd, d_addr); 920 set_reg_idx(cd, regs, &idx, max_regs, 921 d_addr, 0, val); 922 } 923 } 924 l_addr += 8; 925 } 926 } 927 928 /* 929 * To save time, there are only 6 traces poplulated on Uid=2, 930 * Ring=1. each with iters=512. 931 */ 932 for (ring = 0; ring < 8; ring++) { /* 0 is fls, 1 is fds, 933 2...7 are ASI rings */ 934 addr = GENWQE_UID_OFFS(uid) | IO_EXTENDED_DIAG_MAP(ring); 935 val = __genwqe_readq(cd, addr); 936 937 if ((val == 0x0ull) || (val == -1ull)) 938 continue; 939 940 traps = (val >> 24) & 0xff; /* Number of Traps */ 941 traces = (val >> 16) & 0xff; /* Number of Traces */ 942 trace_entries = val & 0xffff; /* Entries per trace */ 943 944 /* Note: This is a combined loop that dumps both the traps */ 945 /* (for the trace == 0 case) as well as the traces 1 to */ 946 /* 'traces'. */ 947 for (trace = 0; trace <= traces; trace++) { 948 u32 diag_sel = 949 GENWQE_EXTENDED_DIAG_SELECTOR(ring, trace); 950 951 addr = (GENWQE_UID_OFFS(uid) | 952 IO_EXTENDED_DIAG_SELECTOR); 953 __genwqe_writeq(cd, addr, diag_sel); 954 955 for (trace_entry = 0; 956 trace_entry < (trace ? trace_entries : traps); 957 trace_entry++) { 958 addr = (GENWQE_UID_OFFS(uid) | 959 IO_EXTENDED_DIAG_READ_MBX); 960 val = __genwqe_readq(cd, addr); 961 set_reg_idx(cd, regs, &idx, max_regs, addr, 962 (diag_sel<<16) | trace_entry, val); 963 } 964 } 965 } 966 return 0; 967 } 968 969 /** 970 * genwqe_write_vreg() - Write register in virtual window 971 * @cd: genwqe device descriptor 972 * @reg: register (byte) offset within BAR 973 * @val: value to write 974 * @func: PCI virtual function 975 * 976 * Note, these registers are only accessible to the PF through the 977 * VF-window. It is not intended for the VF to access. 978 */ 979 int genwqe_write_vreg(struct genwqe_dev *cd, u32 reg, u64 val, int func) 980 { 981 __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); 982 __genwqe_writeq(cd, reg, val); 983 return 0; 984 } 985 986 /** 987 * genwqe_read_vreg() - Read register in virtual window 988 * @cd: genwqe device descriptor 989 * @reg: register (byte) offset within BAR 990 * @func: PCI virtual function 991 * 992 * Note, these registers are only accessible to the PF through the 993 * VF-window. It is not intended for the VF to access. 994 */ 995 u64 genwqe_read_vreg(struct genwqe_dev *cd, u32 reg, int func) 996 { 997 __genwqe_writeq(cd, IO_PF_SLC_VIRTUAL_WINDOW, func & 0xf); 998 return __genwqe_readq(cd, reg); 999 } 1000 1001 /** 1002 * genwqe_base_clock_frequency() - Deteremine base clock frequency of the card 1003 * @cd: genwqe device descriptor 1004 * 1005 * Note: From a design perspective it turned out to be a bad idea to 1006 * use codes here to specifiy the frequency/speed values. An old 1007 * driver cannot understand new codes and is therefore always a 1008 * problem. Better is to measure out the value or put the 1009 * speed/frequency directly into a register which is always a valid 1010 * value for old as well as for new software. 1011 * 1012 * Return: Card clock in MHz 1013 */ 1014 int genwqe_base_clock_frequency(struct genwqe_dev *cd) 1015 { 1016 u16 speed; /* MHz MHz MHz MHz */ 1017 static const int speed_grade[] = { 250, 200, 166, 175 }; 1018 1019 speed = (u16)((cd->slu_unitcfg >> 28) & 0x0full); 1020 if (speed >= ARRAY_SIZE(speed_grade)) 1021 return 0; /* illegal value */ 1022 1023 return speed_grade[speed]; 1024 } 1025 1026 /** 1027 * genwqe_stop_traps() - Stop traps 1028 * @cd: genwqe device descriptor 1029 * 1030 * Before reading out the analysis data, we need to stop the traps. 1031 */ 1032 void genwqe_stop_traps(struct genwqe_dev *cd) 1033 { 1034 __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_SET, 0xcull); 1035 } 1036 1037 /** 1038 * genwqe_start_traps() - Start traps 1039 * @cd: genwqe device descriptor 1040 * 1041 * After having read the data, we can/must enable the traps again. 1042 */ 1043 void genwqe_start_traps(struct genwqe_dev *cd) 1044 { 1045 __genwqe_writeq(cd, IO_SLC_MISC_DEBUG_CLR, 0xcull); 1046 1047 if (genwqe_need_err_masking(cd)) 1048 __genwqe_writeq(cd, IO_SLC_MISC_DEBUG, 0x0aull); 1049 } 1050