1 // SPDX-License-Identifier: GPL-2.0+ 2 // Copyright 2017 IBM Corp. 3 #include <asm/pnv-ocxl.h> 4 #include <asm/opal.h> 5 #include <asm/xive.h> 6 #include <misc/ocxl-config.h> 7 #include "pci.h" 8 9 #define PNV_OCXL_TL_P9_RECV_CAP 0x000000000000000Full 10 #define PNV_OCXL_ACTAG_MAX 64 11 /* PASIDs are 20-bit, but on P9, NPU can only handle 15 bits */ 12 #define PNV_OCXL_PASID_BITS 15 13 #define PNV_OCXL_PASID_MAX ((1 << PNV_OCXL_PASID_BITS) - 1) 14 15 #define AFU_PRESENT (1 << 31) 16 #define AFU_INDEX_MASK 0x3F000000 17 #define AFU_INDEX_SHIFT 24 18 #define ACTAG_MASK 0xFFF 19 20 21 struct actag_range { 22 u16 start; 23 u16 count; 24 }; 25 26 struct npu_link { 27 struct list_head list; 28 int domain; 29 int bus; 30 int dev; 31 u16 fn_desired_actags[8]; 32 struct actag_range fn_actags[8]; 33 bool assignment_done; 34 }; 35 static struct list_head links_list = LIST_HEAD_INIT(links_list); 36 static DEFINE_MUTEX(links_list_lock); 37 38 39 /* 40 * opencapi actags handling: 41 * 42 * When sending commands, the opencapi device references the memory 43 * context it's targeting with an 'actag', which is really an alias 44 * for a (BDF, pasid) combination. When it receives a command, the NPU 45 * must do a lookup of the actag to identify the memory context. The 46 * hardware supports a finite number of actags per link (64 for 47 * POWER9). 48 * 49 * The device can carry multiple functions, and each function can have 50 * multiple AFUs. Each AFU advertises in its config space the number 51 * of desired actags. The host must configure in the config space of 52 * the AFU how many actags the AFU is really allowed to use (which can 53 * be less than what the AFU desires). 54 * 55 * When a PCI function is probed by the driver, it has no visibility 56 * about the other PCI functions and how many actags they'd like, 57 * which makes it impossible to distribute actags fairly among AFUs. 58 * 59 * Unfortunately, the only way to know how many actags a function 60 * desires is by looking at the data for each AFU in the config space 61 * and add them up. Similarly, the only way to know how many actags 62 * all the functions of the physical device desire is by adding the 63 * previously computed function counts. Then we can match that against 64 * what the hardware supports. 65 * 66 * To get a comprehensive view, we use a 'pci fixup': at the end of 67 * PCI enumeration, each function counts how many actags its AFUs 68 * desire and we save it in a 'npu_link' structure, shared between all 69 * the PCI functions of a same device. Therefore, when the first 70 * function is probed by the driver, we can get an idea of the total 71 * count of desired actags for the device, and assign the actags to 72 * the AFUs, by pro-rating if needed. 73 */ 74 75 static int find_dvsec_from_pos(struct pci_dev *dev, int dvsec_id, int pos) 76 { 77 int vsec = pos; 78 u16 vendor, id; 79 80 while ((vsec = pci_find_next_ext_capability(dev, vsec, 81 OCXL_EXT_CAP_ID_DVSEC))) { 82 pci_read_config_word(dev, vsec + OCXL_DVSEC_VENDOR_OFFSET, 83 &vendor); 84 pci_read_config_word(dev, vsec + OCXL_DVSEC_ID_OFFSET, &id); 85 if (vendor == PCI_VENDOR_ID_IBM && id == dvsec_id) 86 return vsec; 87 } 88 return 0; 89 } 90 91 static int find_dvsec_afu_ctrl(struct pci_dev *dev, u8 afu_idx) 92 { 93 int vsec = 0; 94 u8 idx; 95 96 while ((vsec = find_dvsec_from_pos(dev, OCXL_DVSEC_AFU_CTRL_ID, 97 vsec))) { 98 pci_read_config_byte(dev, vsec + OCXL_DVSEC_AFU_CTRL_AFU_IDX, 99 &idx); 100 if (idx == afu_idx) 101 return vsec; 102 } 103 return 0; 104 } 105 106 static int get_max_afu_index(struct pci_dev *dev, int *afu_idx) 107 { 108 int pos; 109 u32 val; 110 111 pos = find_dvsec_from_pos(dev, OCXL_DVSEC_FUNC_ID, 0); 112 if (!pos) 113 return -ESRCH; 114 115 pci_read_config_dword(dev, pos + OCXL_DVSEC_FUNC_OFF_INDEX, &val); 116 if (val & AFU_PRESENT) 117 *afu_idx = (val & AFU_INDEX_MASK) >> AFU_INDEX_SHIFT; 118 else 119 *afu_idx = -1; 120 return 0; 121 } 122 123 static int get_actag_count(struct pci_dev *dev, int afu_idx, int *actag) 124 { 125 int pos; 126 u16 actag_sup; 127 128 pos = find_dvsec_afu_ctrl(dev, afu_idx); 129 if (!pos) 130 return -ESRCH; 131 132 pci_read_config_word(dev, pos + OCXL_DVSEC_AFU_CTRL_ACTAG_SUP, 133 &actag_sup); 134 *actag = actag_sup & ACTAG_MASK; 135 return 0; 136 } 137 138 static struct npu_link *find_link(struct pci_dev *dev) 139 { 140 struct npu_link *link; 141 142 list_for_each_entry(link, &links_list, list) { 143 /* The functions of a device all share the same link */ 144 if (link->domain == pci_domain_nr(dev->bus) && 145 link->bus == dev->bus->number && 146 link->dev == PCI_SLOT(dev->devfn)) { 147 return link; 148 } 149 } 150 151 /* link doesn't exist yet. Allocate one */ 152 link = kzalloc(sizeof(struct npu_link), GFP_KERNEL); 153 if (!link) 154 return NULL; 155 link->domain = pci_domain_nr(dev->bus); 156 link->bus = dev->bus->number; 157 link->dev = PCI_SLOT(dev->devfn); 158 list_add(&link->list, &links_list); 159 return link; 160 } 161 162 static void pnv_ocxl_fixup_actag(struct pci_dev *dev) 163 { 164 struct pci_controller *hose = pci_bus_to_host(dev->bus); 165 struct pnv_phb *phb = hose->private_data; 166 struct npu_link *link; 167 int rc, afu_idx = -1, i, actag; 168 169 if (!machine_is(powernv)) 170 return; 171 172 if (phb->type != PNV_PHB_NPU_OCAPI) 173 return; 174 175 mutex_lock(&links_list_lock); 176 177 link = find_link(dev); 178 if (!link) { 179 dev_warn(&dev->dev, "couldn't update actag information\n"); 180 mutex_unlock(&links_list_lock); 181 return; 182 } 183 184 /* 185 * Check how many actags are desired for the AFUs under that 186 * function and add it to the count for the link 187 */ 188 rc = get_max_afu_index(dev, &afu_idx); 189 if (rc) { 190 /* Most likely an invalid config space */ 191 dev_dbg(&dev->dev, "couldn't find AFU information\n"); 192 afu_idx = -1; 193 } 194 195 link->fn_desired_actags[PCI_FUNC(dev->devfn)] = 0; 196 for (i = 0; i <= afu_idx; i++) { 197 /* 198 * AFU index 'holes' are allowed. So don't fail if we 199 * can't read the actag info for an index 200 */ 201 rc = get_actag_count(dev, i, &actag); 202 if (rc) 203 continue; 204 link->fn_desired_actags[PCI_FUNC(dev->devfn)] += actag; 205 } 206 dev_dbg(&dev->dev, "total actags for function: %d\n", 207 link->fn_desired_actags[PCI_FUNC(dev->devfn)]); 208 209 mutex_unlock(&links_list_lock); 210 } 211 DECLARE_PCI_FIXUP_HEADER(PCI_ANY_ID, PCI_ANY_ID, pnv_ocxl_fixup_actag); 212 213 static u16 assign_fn_actags(u16 desired, u16 total) 214 { 215 u16 count; 216 217 if (total <= PNV_OCXL_ACTAG_MAX) 218 count = desired; 219 else 220 count = PNV_OCXL_ACTAG_MAX * desired / total; 221 222 return count; 223 } 224 225 static void assign_actags(struct npu_link *link) 226 { 227 u16 actag_count, range_start = 0, total_desired = 0; 228 int i; 229 230 for (i = 0; i < 8; i++) 231 total_desired += link->fn_desired_actags[i]; 232 233 for (i = 0; i < 8; i++) { 234 if (link->fn_desired_actags[i]) { 235 actag_count = assign_fn_actags( 236 link->fn_desired_actags[i], 237 total_desired); 238 link->fn_actags[i].start = range_start; 239 link->fn_actags[i].count = actag_count; 240 range_start += actag_count; 241 WARN_ON(range_start >= PNV_OCXL_ACTAG_MAX); 242 } 243 pr_debug("link %x:%x:%x fct %d actags: start=%d count=%d (desired=%d)\n", 244 link->domain, link->bus, link->dev, i, 245 link->fn_actags[i].start, link->fn_actags[i].count, 246 link->fn_desired_actags[i]); 247 } 248 link->assignment_done = true; 249 } 250 251 int pnv_ocxl_get_actag(struct pci_dev *dev, u16 *base, u16 *enabled, 252 u16 *supported) 253 { 254 struct npu_link *link; 255 256 mutex_lock(&links_list_lock); 257 258 link = find_link(dev); 259 if (!link) { 260 dev_err(&dev->dev, "actag information not found\n"); 261 mutex_unlock(&links_list_lock); 262 return -ENODEV; 263 } 264 /* 265 * On p9, we only have 64 actags per link, so they must be 266 * shared by all the functions of the same adapter. We counted 267 * the desired actag counts during PCI enumeration, so that we 268 * can allocate a pro-rated number of actags to each function. 269 */ 270 if (!link->assignment_done) 271 assign_actags(link); 272 273 *base = link->fn_actags[PCI_FUNC(dev->devfn)].start; 274 *enabled = link->fn_actags[PCI_FUNC(dev->devfn)].count; 275 *supported = link->fn_desired_actags[PCI_FUNC(dev->devfn)]; 276 277 mutex_unlock(&links_list_lock); 278 return 0; 279 } 280 EXPORT_SYMBOL_GPL(pnv_ocxl_get_actag); 281 282 int pnv_ocxl_get_pasid_count(struct pci_dev *dev, int *count) 283 { 284 struct npu_link *link; 285 int i, rc = -EINVAL; 286 287 /* 288 * The number of PASIDs (process address space ID) which can 289 * be used by a function depends on how many functions exist 290 * on the device. The NPU needs to be configured to know how 291 * many bits are available to PASIDs and how many are to be 292 * used by the function BDF indentifier. 293 * 294 * We only support one AFU-carrying function for now. 295 */ 296 mutex_lock(&links_list_lock); 297 298 link = find_link(dev); 299 if (!link) { 300 dev_err(&dev->dev, "actag information not found\n"); 301 mutex_unlock(&links_list_lock); 302 return -ENODEV; 303 } 304 305 for (i = 0; i < 8; i++) 306 if (link->fn_desired_actags[i] && (i == PCI_FUNC(dev->devfn))) { 307 *count = PNV_OCXL_PASID_MAX; 308 rc = 0; 309 break; 310 } 311 312 mutex_unlock(&links_list_lock); 313 dev_dbg(&dev->dev, "%d PASIDs available for function\n", 314 rc ? 0 : *count); 315 return rc; 316 } 317 EXPORT_SYMBOL_GPL(pnv_ocxl_get_pasid_count); 318 319 static void set_templ_rate(unsigned int templ, unsigned int rate, char *buf) 320 { 321 int shift, idx; 322 323 WARN_ON(templ > PNV_OCXL_TL_MAX_TEMPLATE); 324 idx = (PNV_OCXL_TL_MAX_TEMPLATE - templ) / 2; 325 shift = 4 * (1 - ((PNV_OCXL_TL_MAX_TEMPLATE - templ) % 2)); 326 buf[idx] |= rate << shift; 327 } 328 329 int pnv_ocxl_get_tl_cap(struct pci_dev *dev, long *cap, 330 char *rate_buf, int rate_buf_size) 331 { 332 if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) 333 return -EINVAL; 334 /* 335 * The TL capabilities are a characteristic of the NPU, so 336 * we go with hard-coded values. 337 * 338 * The receiving rate of each template is encoded on 4 bits. 339 * 340 * On P9: 341 * - templates 0 -> 3 are supported 342 * - templates 0, 1 and 3 have a 0 receiving rate 343 * - template 2 has receiving rate of 1 (extra cycle) 344 */ 345 memset(rate_buf, 0, rate_buf_size); 346 set_templ_rate(2, 1, rate_buf); 347 *cap = PNV_OCXL_TL_P9_RECV_CAP; 348 return 0; 349 } 350 EXPORT_SYMBOL_GPL(pnv_ocxl_get_tl_cap); 351 352 int pnv_ocxl_set_tl_conf(struct pci_dev *dev, long cap, 353 uint64_t rate_buf_phys, int rate_buf_size) 354 { 355 struct pci_controller *hose = pci_bus_to_host(dev->bus); 356 struct pnv_phb *phb = hose->private_data; 357 int rc; 358 359 if (rate_buf_size != PNV_OCXL_TL_RATE_BUF_SIZE) 360 return -EINVAL; 361 362 rc = opal_npu_tl_set(phb->opal_id, dev->devfn, cap, 363 rate_buf_phys, rate_buf_size); 364 if (rc) { 365 dev_err(&dev->dev, "Can't configure host TL: %d\n", rc); 366 return -EINVAL; 367 } 368 return 0; 369 } 370 EXPORT_SYMBOL_GPL(pnv_ocxl_set_tl_conf); 371 372 int pnv_ocxl_get_xsl_irq(struct pci_dev *dev, int *hwirq) 373 { 374 int rc; 375 376 rc = of_property_read_u32(dev->dev.of_node, "ibm,opal-xsl-irq", hwirq); 377 if (rc) { 378 dev_err(&dev->dev, 379 "Can't get translation interrupt for device\n"); 380 return rc; 381 } 382 return 0; 383 } 384 EXPORT_SYMBOL_GPL(pnv_ocxl_get_xsl_irq); 385 386 void pnv_ocxl_unmap_xsl_regs(void __iomem *dsisr, void __iomem *dar, 387 void __iomem *tfc, void __iomem *pe_handle) 388 { 389 iounmap(dsisr); 390 iounmap(dar); 391 iounmap(tfc); 392 iounmap(pe_handle); 393 } 394 EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_xsl_regs); 395 396 int pnv_ocxl_map_xsl_regs(struct pci_dev *dev, void __iomem **dsisr, 397 void __iomem **dar, void __iomem **tfc, 398 void __iomem **pe_handle) 399 { 400 u64 reg; 401 int i, j, rc = 0; 402 void __iomem *regs[4]; 403 404 /* 405 * opal stores the mmio addresses of the DSISR, DAR, TFC and 406 * PE_HANDLE registers in a device tree property, in that 407 * order 408 */ 409 for (i = 0; i < 4; i++) { 410 rc = of_property_read_u64_index(dev->dev.of_node, 411 "ibm,opal-xsl-mmio", i, ®); 412 if (rc) 413 break; 414 regs[i] = ioremap(reg, 8); 415 if (!regs[i]) { 416 rc = -EINVAL; 417 break; 418 } 419 } 420 if (rc) { 421 dev_err(&dev->dev, "Can't map translation mmio registers\n"); 422 for (j = i - 1; j >= 0; j--) 423 iounmap(regs[j]); 424 } else { 425 *dsisr = regs[0]; 426 *dar = regs[1]; 427 *tfc = regs[2]; 428 *pe_handle = regs[3]; 429 } 430 return rc; 431 } 432 EXPORT_SYMBOL_GPL(pnv_ocxl_map_xsl_regs); 433 434 struct spa_data { 435 u64 phb_opal_id; 436 u32 bdfn; 437 }; 438 439 int pnv_ocxl_spa_setup(struct pci_dev *dev, void *spa_mem, int PE_mask, 440 void **platform_data) 441 { 442 struct pci_controller *hose = pci_bus_to_host(dev->bus); 443 struct pnv_phb *phb = hose->private_data; 444 struct spa_data *data; 445 u32 bdfn; 446 int rc; 447 448 data = kzalloc(sizeof(*data), GFP_KERNEL); 449 if (!data) 450 return -ENOMEM; 451 452 bdfn = (dev->bus->number << 8) | dev->devfn; 453 rc = opal_npu_spa_setup(phb->opal_id, bdfn, virt_to_phys(spa_mem), 454 PE_mask); 455 if (rc) { 456 dev_err(&dev->dev, "Can't setup Shared Process Area: %d\n", rc); 457 kfree(data); 458 return rc; 459 } 460 data->phb_opal_id = phb->opal_id; 461 data->bdfn = bdfn; 462 *platform_data = (void *) data; 463 return 0; 464 } 465 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_setup); 466 467 void pnv_ocxl_spa_release(void *platform_data) 468 { 469 struct spa_data *data = (struct spa_data *) platform_data; 470 int rc; 471 472 rc = opal_npu_spa_setup(data->phb_opal_id, data->bdfn, 0, 0); 473 WARN_ON(rc); 474 kfree(data); 475 } 476 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_release); 477 478 int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle) 479 { 480 struct spa_data *data = (struct spa_data *) platform_data; 481 int rc; 482 483 rc = opal_npu_spa_clear_cache(data->phb_opal_id, data->bdfn, pe_handle); 484 return rc; 485 } 486 EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache); 487 488 int pnv_ocxl_alloc_xive_irq(u32 *irq, u64 *trigger_addr) 489 { 490 __be64 flags, trigger_page; 491 s64 rc; 492 u32 hwirq; 493 494 hwirq = xive_native_alloc_irq(); 495 if (!hwirq) 496 return -ENOENT; 497 498 rc = opal_xive_get_irq_info(hwirq, &flags, NULL, &trigger_page, NULL, 499 NULL); 500 if (rc || !trigger_page) { 501 xive_native_free_irq(hwirq); 502 return -ENOENT; 503 } 504 *irq = hwirq; 505 *trigger_addr = be64_to_cpu(trigger_page); 506 return 0; 507 508 } 509 EXPORT_SYMBOL_GPL(pnv_ocxl_alloc_xive_irq); 510 511 void pnv_ocxl_free_xive_irq(u32 irq) 512 { 513 xive_native_free_irq(irq); 514 } 515 EXPORT_SYMBOL_GPL(pnv_ocxl_free_xive_irq); 516