1 // SPDX-License-Identifier: GPL-2.0+ 2 // Copyright 2017 IBM Corp. 3 #include <asm/pnv-ocxl.h> 4 #include <asm/opal.h> 5 #include <misc/ocxl-config.h> 6 #include "pci.h" 7 8 #define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full 9 #define PNV_OCXL_ACTAG_MAX 64 10 /* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */ 11 #define PNV_OCXL_PASID_BITS 15 12 #define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1) 13 14 #define AFU_PRESENT (1 << 31) 15 #define AFU_INDEX_MASK 0x3F000000 16 #define AFU_INDEX_SHIFT 24 17 #define ACTAG_MASK 0xFFF 18 19 20 struct actag_range { 21 u16 start; 22 u16 count; 23 }; 24 25 struct npu_link { 26 struct list_head list; 27 int domain; 28 int bus; 29 int dev; 30 u16 fn_desired_actags[8]; 31 struct actag_range fn_actags[8]; 32 bool assignment_done; 33 }; 34 static struct list_head links_list = LIST_HEAD_INIT(links_list); 35 static DEFINE_MUTEX(links_list_lock); 36 37 38 /* 39 * opencapi actags handling: 40 * 41 * When sending commands, the opencapi device references the memory 42 * context it's targeting with an 'actag', which is really an alias 43 * for a (BDF, pasid) combination. When it receives a command, the NPU 44 * must do a lookup of the actag to identify the memory context. The 45 * hardware supports a finite number of actags per link (64 for 46 * POWER9). 47 * 48 * The device can carry multiple functions, and each function can have 49 * multiple AFUs. Each AFU advertises in its config space the number 50 * of desired actags. The host must configure in the config space of 51 * the AFU how many actags the AFU is really allowed to use (which can 52 * be less than what the AFU desires). 53 * 54 * When a PCI function is probed by the driver, it has no visibility 55 * about the other PCI functions and how many actags they'd like, 56 * which makes it impossible to distribute actags fairly among AFUs. 57 * 58 * Unfortunately, the only way to know how many actags a function 59 * desires is by looking at the data for each AFU in the config space 60 * and add them up. Similarly, the only way to know how many actags 61 * all the functions of the physical device desire is by adding the 62 * previously computed function counts. Then we can match that against 63 * what the hardware supports. 64 * 65 * To get a comprehensive view, we use a 'pci fixup': at the end of 66 * PCI enumeration, each function counts how many actags its AFUs 67 * desire and we save it in a 'npu_link' structure, shared between all 68 * the PCI functions of a same device. Therefore, when the first 69 * function is probed by the driver, we can get an idea of the total 70 * count of desired actags for the device, and assign the actags to 71 * the AFUs, by pro-rating if needed. 72 */ 73 74 static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos) 75 { 76 int vsec = pos; 77 u16 vendor, id; 78 79 while ((vsec = pci_find_next_ext_capability(dev, vsec, 80 OCXL_EXT_CAP_ID_DVSEC))) { 81 pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, 82 &vendor); 83 pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); 84 if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) 85 return vsec; 86 } 87 return 0; 88 } 89 90 static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) 91 { 92 int vsec = 0; 93 u8 idx; 94 95 while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID, 96 vsec))) { 97 pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, 98 &idx); 99 if (idx == afu_idx) 100 return vsec; 101 } 102 return 0; 103 } 104 105 static int get_max_afu_index(struct pci_dev *dev, int *afu_idx) 106 { 107 int pos; 108 u32 val; 109 110 pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0); 111 if (!pos) 112 return -ESRCH; 113 114 pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); 115 if (val & AFU_PRESENT) 116 *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT; 117 else 118 *afu_idx = -1; 119 return 0; 120 } 121 122 static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag) 123 { 124 int pos; 125 u16 actag_sup; 126 127 pos = find_dvsec_afu_ctrl(dev, afu_idx); 128 if (!pos) 129 return -ESRCH; 130 131 pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, 132 &actag_sup); 133 *actag = actag_sup & ACTAG_MASK; 134 return 0; 135 } 136 137 static struct npu_link *find_link(struct pci_dev *dev) 138 { 139 struct npu_link *link; 140 141 list_for_each_entry(link, &links_list, list) { 142 /* The functions of a device all share the same link */ 143 if (link->domain == pci_domain_nr(dev->bus) && 144 link->bus == dev->bus->number && 145 link->dev == PCI_SLOT(dev->devfn)) { 146 return link; 147 } 148 } 149 150 /* link doesn't exist yet. Allocate one */ 151 link = kzalloc(sizeof(struct npu_link), GFP_KERNEL); 152 if (!link) 153 return NULL; 154 link->domain = pci_domain_nr(dev->bus); 155 link->bus = dev->bus->number; 156 link->dev = PCI_SLOT(dev->devfn); 157 list_add(&link->list, &links_list); 158 return link; 159 } 160 161 static void pnv_ocxl_fixup_actag(struct pci_dev *dev) 162 { 163 struct pci_controller *hose = pci_bus_to_host(dev->bus); 164 struct pnv_phb *phb = hose->private_data; 165 struct npu_link *link; 166 int rc, afu_idx = -1, i, actag; 167 168 if (!machine_is(powernv)) 169 return; 170 171 if (phb->type != PNV_PHB_NPU_OCAPI) 172 return; 173 174 mutex_lock(&links_list_lock); 175 176 link = find_link(dev); 177 if (!link) { 178 dev_warn(&dev->dev, "couldn't update actag information\n"); 179 mutex_unlock(&links_list_lock); 180 return; 181 } 182 183 /* 184 * Check how many actags are desired for the AFUs under that 185 * function and add it to the count for the link 186 */ 187 rc = get_max_afu_index(dev, &afu_idx); 188 if (rc) { 189 /* Most likely an invalid config space */ 190 dev_dbg(&dev->dev, "couldn't find AFU information\n"); 191 afu_idx = -1; 192 } 193 194 link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0; 195 for (i = 0; i <= afu_idx; i++) { 196 /* 197 * AFU index 'holes' are allowed. So don't fail if we 198 * can't read the actag info for an index 199 */ 200 rc = get_actag_count(dev, i, &actag); 201 if (rc) 202 continue; 203 link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag; 204 } 205 dev_dbg(&dev->dev, "total actags for function: %d\n", 206 link->fn_desired_actags[PCI_FUNC(dev->devfn)]); 207 208 mutex_unlock(&links_list_lock); 209 } 210 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag); 211 212 static u16 assign_fn_actags(u16 desired, u16 total) 213 { 214 u16 count; 215 216 if (total <= PNV_OCXL_ACTAG_MAX) 217 count = desired; 218 else 219 count = PNV_OCXL_ACTAG_MAX * desired / total; 220 221 return count; 222 } 223 224 static void assign_actags(struct npu_link *link) 225 { 226 u16 actag_count, range_start = 0, total_desired = 0; 227 int i; 228 229 for (i = 0; i < 8; i++) 230 total_desired += link->fn_desired_actags[i]; 231 232 for (i = 0; i < 8; i++) { 233 if (link->fn_desired_actags[i]) { 234 actag_count = assign_fn_actags( 235 link->fn_desired_actags[i], 236 total_desired); 237 link->fn_actags[i].start = range_start; 238 link->fn_actags[i].count = actag_count; 239 range_start += actag_count; 240 WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX); 241 } 242 pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n", 243 link->domain, link->bus, link->dev, i, 244 link->fn_actags[i].start, link->fn_actags[i].count, 245 link->fn_desired_actags[i]); 246 } 247 link->assignment_done = true; 248 } 249 250 int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, 251 u16 *supported) 252 { 253 struct npu_link *link; 254 255 mutex_lock(&links_list_lock); 256 257 link = find_link(dev); 258 if (!link) { 259 dev_err(&dev->dev, "actag information not found\n"); 260 mutex_unlock(&links_list_lock); 261 return -ENODEV; 262 } 263 /* 264 * On p9, we only have 64 actags per link, so they must be 265 * shared by all the functions of the same adapter. We counted 266 * the desired actag counts during PCI enumeration, so that we 267 * can allocate a pro-rated number of actags to each function. 268 */ 269 if (!link->assignment_done) 270 assign_actags(link); 271 272 *base = link->fn_actags[PCI_FUNC(dev->devfn)].start; 273 *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count; 274 *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)]; 275 276 mutex_unlock(&links_list_lock); 277 return 0; 278 } 279 EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag); 280 281 int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count) 282 { 283 struct npu_link *link; 284 int i, rc = -EINVAL; 285 286 /* 287 * The number of PASIDs (process address space ID) which can 288 * be used by a function depends on how many functions exist 289 * on the device. The NPU needs to be configured to know how 290 * many bits are available to PASIDs and how many are to be 291 * used by the function BDF indentifier. 292 * 293 * We only support one AFU-carrying function for now. 294 */ 295 mutex_lock(&links_list_lock); 296 297 link = find_link(dev); 298 if (!link) { 299 dev_err(&dev->dev, "actag information not found\n"); 300 mutex_unlock(&links_list_lock); 301 return -ENODEV; 302 } 303 304 for (i = 0; i < 8; i++) 305 if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) { 306 *count = PNV_OCXL_PASID_MAX; 307 rc = 0; 308 break; 309 } 310 311 mutex_unlock(&links_list_lock); 312 dev_dbg(&dev->dev, "%d PASIDs available for function\n", 313 rc ? 0 : *count); 314 return rc; 315 } 316 EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count); 317 318 static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf) 319 { 320 int shift, idx; 321 322 WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE); 323 idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2; 324 shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2)); 325 buf[idx] |= rate << shift; 326 } 327 328 int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, 329 char *rate_buf, int rate_buf_size) 330 { 331 if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) 332 return -EINVAL; 333 /* 334 * The TL capabilities are a characteristic of the NPU, so 335 * we go with hard-coded values. 336 * 337 * The receiving rate of each template is encoded on 4 bits. 338 * 339 * On P9: 340 * - templates 0 -> 3 are supported 341 * - templates 0, 1 and 3 have a 0 receiving rate 342 * - template 2 has receiving rate of 1 (extra cycle) 343 */ 344 memset(rate_buf, 0, rate_buf_size); 345 set_templ_rate(2, 1, rate_buf); 346 *cap = PNV_OCXL_TL_P9_RECV_CAP; 347 return 0; 348 } 349 EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap); 350 351 int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, 352 uint64_t rate_buf_phys, int rate_buf_size) 353 { 354 struct pci_controller *hose = pci_bus_to_host(dev->bus); 355 struct pnv_phb *phb = hose->private_data; 356 int rc; 357 358 if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) 359 return -EINVAL; 360 361 rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap, 362 rate_buf_phys, rate_buf_size); 363 if (rc) { 364 dev_err(&dev->dev, "Can't configure host TL: %d\n", rc); 365 return -EINVAL; 366 } 367 return 0; 368 } 369 EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf); 370 371 int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq) 372 { 373 int rc; 374 375 rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq); 376 if (rc) { 377 dev_err(&dev->dev, 378 "Can't get translation interrupt for device\n"); 379 return rc; 380 } 381 return 0; 382 } 383 EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq); 384 385 void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, 386 void __iomem *tfc, void __iomem *pe_handle) 387 { 388 iounmap(dsisr); 389 iounmap(dar); 390 iounmap(tfc); 391 iounmap(pe_handle); 392 } 393 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs); 394 395 int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, 396 void __iomem **dar, void __iomem **tfc, 397 void __iomem **pe_handle) 398 { 399 u64 reg; 400 int i, j, rc = 0; 401 void __iomem *regs[4]; 402 403 /* 404 * opal stores the mmio addresses of the DSISR, DAR, TFC and 405 * PE_HANDLE registers in a device tree property, in that 406 * order 407 */ 408 for (i = 0; i < 4; i++) { 409 rc = of_property_read_u64_index(dev->dev.of_node, 410 "ibm,opal-xsl-mmio", i, ®); 411 if (rc) 412 break; 413 regs[i] = ioremap(reg, 8); 414 if (!regs[i]) { 415 rc = -EINVAL; 416 break; 417 } 418 } 419 if (rc) { 420 dev_err(&dev->dev, "Can't map translation mmio registers\n"); 421 for (j = i - 1; j >= 0; j--) 422 iounmap(regs[j]); 423 } else { 424 *dsisr = regs[0]; 425 *dar = regs[1]; 426 *tfc = regs[2]; 427 *pe_handle = regs[3]; 428 } 429 return rc; 430 } 431 EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs); 432 433 struct spa_data { 434 u64 phb_opal_id; 435 u32 bdfn; 436 }; 437 438 int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, 439 void **platform_data) 440 { 441 struct pci_controller *hose = pci_bus_to_host(dev->bus); 442 struct pnv_phb *phb = hose->private_data; 443 struct spa_data *data; 444 u32 bdfn; 445 int rc; 446 447 data = kzalloc(sizeof(*data), GFP_KERNEL); 448 if (!data) 449 return -ENOMEM; 450 451 bdfn = (dev->bus->number << 8) | dev->devfn; 452 rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem), 453 PE_mask); 454 if (rc) { 455 dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc); 456 kfree(data); 457 return rc; 458 } 459 data->phb_opal_id = phb->opal_id; 460 data->bdfn = bdfn; 461 *platform_data = (void *) data; 462 return 0; 463 } 464 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup); 465 466 void pnv_ocxl_spa_release(void *platform_data) 467 { 468 struct spa_data *data = (struct spa_data *) platform_data; 469 int rc; 470 471 rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0); 472 WARN_ON(rc); 473 kfree(data); 474 } 475 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release); 476 477 int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) 478 { 479 struct spa_data *data = (struct spa_data *) platform_data; 480 int rc; 481 482 rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle); 483 return rc; 484 } 485 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); 486 487 int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid, 488 uint64_t lpcr, void __iomem **arva) 489 { 490 struct pci_controller *hose = pci_bus_to_host(dev->bus); 491 struct pnv_phb *phb = hose->private_data; 492 u64 mmio_atsd; 493 int rc; 494 495 /* ATSD physical address. 496 * ATSD LAUNCH register: write access initiates a shoot down to 497 * initiate the TLB Invalidate command. 498 */ 499 rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd", 500 0, &mmio_atsd); 501 if (rc) { 502 dev_info(&dev->dev, "No available ATSD found\n"); 503 return rc; 504 } 505 506 /* Assign a register set to a Logical Partition and MMIO ATSD 507 * LPARID register to the required value. 508 */ 509 rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev), 510 lparid, lpcr); 511 if (rc) { 512 dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc); 513 return rc; 514 } 515 516 *arva = ioremap(mmio_atsd, 24); 517 if (!(*arva)) { 518 dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd); 519 rc = -ENOMEM; 520 } 521 522 return rc; 523 } 524 EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar); 525 526 void pnv_ocxl_unmap_lpar(void __iomem *arva) 527 { 528 iounmap(arva); 529 } 530 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar); 531 532 void pnv_ocxl_tlb_invalidate(void __iomem *arva, 533 unsigned long pid, 534 unsigned long addr, 535 unsigned long page_size) 536 { 537 unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT); 538 u64 val = 0ull; 539 int pend; 540 u8 size; 541 542 if (!(arva)) 543 return; 544 545 if (addr) { 546 /* load Abbreviated Virtual Address register with 547 * the necessary value 548 */ 549 val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51)); 550 out_be64(arva + PNV_OCXL_ATSD_AVA, val); 551 } 552 553 /* Write access initiates a shoot down to initiate the 554 * TLB Invalidate command 555 */ 556 val = PNV_OCXL_ATSD_LNCH_R; 557 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10); 558 if (addr) 559 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00); 560 else { 561 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01); 562 val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON; 563 } 564 val |= PNV_OCXL_ATSD_LNCH_PRS; 565 /* Actual Page Size to be invalidated 566 * 000 4KB 567 * 101 64KB 568 * 001 2MB 569 * 010 1GB 570 */ 571 size = 0b101; 572 if (page_size == 0x1000) 573 size = 0b000; 574 if (page_size == 0x200000) 575 size = 0b001; 576 if (page_size == 0x40000000) 577 size = 0b010; 578 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size); 579 val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid); 580 out_be64(arva + PNV_OCXL_ATSD_LNCH, val); 581 582 /* Poll the ATSD status register to determine when the 583 * TLB Invalidate has been completed. 584 */ 585 val = in_be64(arva + PNV_OCXL_ATSD_STAT); 586 pend = val >> 63; 587 588 while (pend) { 589 if (time_after_eq(jiffies, timeout)) { 590 pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n", 591 __func__, val, pid); 592 return; 593 } 594 cpu_relax(); 595 val = in_be64(arva + PNV_OCXL_ATSD_STAT); 596 pend = val >> 63; 597 } 598 } 599 EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate); 600