1 // SPDX-License-Identifier: GPL-2.0 2 3 #include <linux/kernel.h> 4 #include <linux/ioport.h> 5 #include <linux/bitmap.h> 6 #include <linux/pci.h> 7 8 #include <asm/opal.h> 9 10 #include "pci.h" 11 12 /* for pci_dev_is_added() */ 13 #include "../../../../drivers/pci/pci.h" 14 15 16 static void pnv_pci_ioda_fixup_iov_resources(struct pci_dev *pdev) 17 { 18 struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); 19 const resource_size_t gate = phb->ioda.m64_segsize >> 2; 20 struct resource *res; 21 int i; 22 resource_size_t size, total_vf_bar_sz; 23 struct pnv_iov_data *iov; 24 int mul, total_vfs; 25 26 iov = kzalloc(sizeof(*iov), GFP_KERNEL); 27 if (!iov) 28 goto truncate_iov; 29 pdev->dev.archdata.iov_data = iov; 30 31 total_vfs = pci_sriov_get_totalvfs(pdev); 32 mul = phb->ioda.total_pe_num; 33 total_vf_bar_sz = 0; 34 35 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 36 res = &pdev->resource[i + PCI_IOV_RESOURCES]; 37 if (!res->flags || res->parent) 38 continue; 39 if (!pnv_pci_is_m64_flags(res->flags)) { 40 dev_warn(&pdev->dev, "Don't support SR-IOV with non M64 VF BAR%d: %pR. \n", 41 i, res); 42 goto truncate_iov; 43 } 44 45 total_vf_bar_sz += pci_iov_resource_size(pdev, 46 i + PCI_IOV_RESOURCES); 47 48 /* 49 * If bigger than quarter of M64 segment size, just round up 50 * power of two. 51 * 52 * Generally, one M64 BAR maps one IOV BAR. To avoid conflict 53 * with other devices, IOV BAR size is expanded to be 54 * (total_pe * VF_BAR_size). When VF_BAR_size is half of M64 55 * segment size , the expanded size would equal to half of the 56 * whole M64 space size, which will exhaust the M64 Space and 57 * limit the system flexibility. This is a design decision to 58 * set the boundary to quarter of the M64 segment size. 59 */ 60 if (total_vf_bar_sz > gate) { 61 mul = roundup_pow_of_two(total_vfs); 62 dev_info(&pdev->dev, 63 "VF BAR Total IOV size %llx > %llx, roundup to %d VFs\n", 64 total_vf_bar_sz, gate, mul); 65 iov->m64_single_mode = true; 66 break; 67 } 68 } 69 70 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 71 res = &pdev->resource[i + PCI_IOV_RESOURCES]; 72 if (!res->flags || res->parent) 73 continue; 74 75 size = pci_iov_resource_size(pdev, i + PCI_IOV_RESOURCES); 76 /* 77 * On PHB3, the minimum size alignment of M64 BAR in single 78 * mode is 32MB. 79 */ 80 if (iov->m64_single_mode && (size < SZ_32M)) 81 goto truncate_iov; 82 dev_dbg(&pdev->dev, " Fixing VF BAR%d: %pR to\n", i, res); 83 res->end = res->start + size * mul - 1; 84 dev_dbg(&pdev->dev, " %pR\n", res); 85 dev_info(&pdev->dev, "VF BAR%d: %pR (expanded to %d VFs for PE alignment)", 86 i, res, mul); 87 } 88 iov->vfs_expanded = mul; 89 90 return; 91 92 truncate_iov: 93 /* To save MMIO space, IOV BAR is truncated. */ 94 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 95 res = &pdev->resource[i + PCI_IOV_RESOURCES]; 96 res->flags = 0; 97 res->end = res->start - 1; 98 } 99 100 pdev->dev.archdata.iov_data = NULL; 101 kfree(iov); 102 } 103 104 void pnv_pci_ioda_fixup_iov(struct pci_dev *pdev) 105 { 106 if (WARN_ON(pci_dev_is_added(pdev))) 107 return; 108 109 if (pdev->is_virtfn) { 110 struct pnv_ioda_pe *pe = pnv_ioda_get_pe(pdev); 111 112 /* 113 * VF PEs are single-device PEs so their pdev pointer needs to 114 * be set. The pdev doesn't exist when the PE is allocated (in 115 * (pcibios_sriov_enable()) so we fix it up here. 116 */ 117 pe->pdev = pdev; 118 WARN_ON(!(pe->flags & PNV_IODA_PE_VF)); 119 } else if (pdev->is_physfn) { 120 /* 121 * For PFs adjust their allocated IOV resources to match what 122 * the PHB can support using it's M64 BAR table. 123 */ 124 pnv_pci_ioda_fixup_iov_resources(pdev); 125 } 126 } 127 128 resource_size_t pnv_pci_iov_resource_alignment(struct pci_dev *pdev, 129 int resno) 130 { 131 struct pnv_phb *phb = pci_bus_to_pnvhb(pdev->bus); 132 struct pnv_iov_data *iov = pnv_iov_get(pdev); 133 resource_size_t align; 134 135 /* 136 * On PowerNV platform, IOV BAR is mapped by M64 BAR to enable the 137 * SR-IOV. While from hardware perspective, the range mapped by M64 138 * BAR should be size aligned. 139 * 140 * When IOV BAR is mapped with M64 BAR in Single PE mode, the extra 141 * powernv-specific hardware restriction is gone. But if just use the 142 * VF BAR size as the alignment, PF BAR / VF BAR may be allocated with 143 * in one segment of M64 #15, which introduces the PE conflict between 144 * PF and VF. Based on this, the minimum alignment of an IOV BAR is 145 * m64_segsize. 146 * 147 * This function returns the total IOV BAR size if M64 BAR is in 148 * Shared PE mode or just VF BAR size if not. 149 * If the M64 BAR is in Single PE mode, return the VF BAR size or 150 * M64 segment size if IOV BAR size is less. 151 */ 152 align = pci_iov_resource_size(pdev, resno); 153 154 /* 155 * iov can be null if we have an SR-IOV device with IOV BAR that can't 156 * be placed in the m64 space (i.e. The BAR is 32bit or non-prefetch). 157 * In that case we don't allow VFs to be enabled so just return the 158 * default alignment. 159 */ 160 if (!iov) 161 return align; 162 if (!iov->vfs_expanded) 163 return align; 164 if (iov->m64_single_mode) 165 return max(align, (resource_size_t)phb->ioda.m64_segsize); 166 167 return iov->vfs_expanded * align; 168 } 169 170 static int pnv_pci_vf_release_m64(struct pci_dev *pdev, u16 num_vfs) 171 { 172 struct pnv_iov_data *iov; 173 struct pnv_phb *phb; 174 int i, j; 175 int m64_bars; 176 177 phb = pci_bus_to_pnvhb(pdev->bus); 178 iov = pnv_iov_get(pdev); 179 180 if (iov->m64_single_mode) 181 m64_bars = num_vfs; 182 else 183 m64_bars = 1; 184 185 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) 186 for (j = 0; j < m64_bars; j++) { 187 if (iov->m64_map[j][i] == IODA_INVALID_M64) 188 continue; 189 opal_pci_phb_mmio_enable(phb->opal_id, 190 OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 0); 191 clear_bit(iov->m64_map[j][i], &phb->ioda.m64_bar_alloc); 192 iov->m64_map[j][i] = IODA_INVALID_M64; 193 } 194 195 kfree(iov->m64_map); 196 return 0; 197 } 198 199 static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs) 200 { 201 struct pnv_iov_data *iov; 202 struct pnv_phb *phb; 203 unsigned int win; 204 struct resource *res; 205 int i, j; 206 int64_t rc; 207 int total_vfs; 208 resource_size_t size, start; 209 int pe_num; 210 int m64_bars; 211 212 phb = pci_bus_to_pnvhb(pdev->bus); 213 iov = pnv_iov_get(pdev); 214 total_vfs = pci_sriov_get_totalvfs(pdev); 215 216 if (iov->m64_single_mode) 217 m64_bars = num_vfs; 218 else 219 m64_bars = 1; 220 221 iov->m64_map = kmalloc_array(m64_bars, 222 sizeof(*iov->m64_map), 223 GFP_KERNEL); 224 if (!iov->m64_map) 225 return -ENOMEM; 226 /* Initialize the m64_map to IODA_INVALID_M64 */ 227 for (i = 0; i < m64_bars ; i++) 228 for (j = 0; j < PCI_SRIOV_NUM_BARS; j++) 229 iov->m64_map[i][j] = IODA_INVALID_M64; 230 231 232 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 233 res = &pdev->resource[i + PCI_IOV_RESOURCES]; 234 if (!res->flags || !res->parent) 235 continue; 236 237 for (j = 0; j < m64_bars; j++) { 238 do { 239 win = find_next_zero_bit(&phb->ioda.m64_bar_alloc, 240 phb->ioda.m64_bar_idx + 1, 0); 241 242 if (win >= phb->ioda.m64_bar_idx + 1) 243 goto m64_failed; 244 } while (test_and_set_bit(win, &phb->ioda.m64_bar_alloc)); 245 246 iov->m64_map[j][i] = win; 247 248 if (iov->m64_single_mode) { 249 size = pci_iov_resource_size(pdev, 250 PCI_IOV_RESOURCES + i); 251 start = res->start + size * j; 252 } else { 253 size = resource_size(res); 254 start = res->start; 255 } 256 257 /* Map the M64 here */ 258 if (iov->m64_single_mode) { 259 pe_num = iov->pe_num_map[j]; 260 rc = opal_pci_map_pe_mmio_window(phb->opal_id, 261 pe_num, OPAL_M64_WINDOW_TYPE, 262 iov->m64_map[j][i], 0); 263 } 264 265 rc = opal_pci_set_phb_mem_window(phb->opal_id, 266 OPAL_M64_WINDOW_TYPE, 267 iov->m64_map[j][i], 268 start, 269 0, /* unused */ 270 size); 271 272 273 if (rc != OPAL_SUCCESS) { 274 dev_err(&pdev->dev, "Failed to map M64 window #%d: %lld\n", 275 win, rc); 276 goto m64_failed; 277 } 278 279 if (iov->m64_single_mode) 280 rc = opal_pci_phb_mmio_enable(phb->opal_id, 281 OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 2); 282 else 283 rc = opal_pci_phb_mmio_enable(phb->opal_id, 284 OPAL_M64_WINDOW_TYPE, iov->m64_map[j][i], 1); 285 286 if (rc != OPAL_SUCCESS) { 287 dev_err(&pdev->dev, "Failed to enable M64 window #%d: %llx\n", 288 win, rc); 289 goto m64_failed; 290 } 291 } 292 } 293 return 0; 294 295 m64_failed: 296 pnv_pci_vf_release_m64(pdev, num_vfs); 297 return -EBUSY; 298 } 299 300 static void pnv_ioda_release_vf_PE(struct pci_dev *pdev) 301 { 302 struct pnv_phb *phb; 303 struct pnv_ioda_pe *pe, *pe_n; 304 305 phb = pci_bus_to_pnvhb(pdev->bus); 306 307 if (!pdev->is_physfn) 308 return; 309 310 /* FIXME: Use pnv_ioda_release_pe()? */ 311 list_for_each_entry_safe(pe, pe_n, &phb->ioda.pe_list, list) { 312 if (pe->parent_dev != pdev) 313 continue; 314 315 pnv_pci_ioda2_release_pe_dma(pe); 316 317 /* Remove from list */ 318 mutex_lock(&phb->ioda.pe_list_mutex); 319 list_del(&pe->list); 320 mutex_unlock(&phb->ioda.pe_list_mutex); 321 322 pnv_ioda_deconfigure_pe(phb, pe); 323 324 pnv_ioda_free_pe(pe); 325 } 326 } 327 328 static int pnv_pci_vf_resource_shift(struct pci_dev *dev, int offset) 329 { 330 struct resource *res, res2; 331 struct pnv_iov_data *iov; 332 resource_size_t size; 333 u16 num_vfs; 334 int i; 335 336 if (!dev->is_physfn) 337 return -EINVAL; 338 iov = pnv_iov_get(dev); 339 340 /* 341 * "offset" is in VFs. The M64 windows are sized so that when they 342 * are segmented, each segment is the same size as the IOV BAR. 343 * Each segment is in a separate PE, and the high order bits of the 344 * address are the PE number. Therefore, each VF's BAR is in a 345 * separate PE, and changing the IOV BAR start address changes the 346 * range of PEs the VFs are in. 347 */ 348 num_vfs = iov->num_vfs; 349 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 350 res = &dev->resource[i + PCI_IOV_RESOURCES]; 351 if (!res->flags || !res->parent) 352 continue; 353 354 /* 355 * The actual IOV BAR range is determined by the start address 356 * and the actual size for num_vfs VFs BAR. This check is to 357 * make sure that after shifting, the range will not overlap 358 * with another device. 359 */ 360 size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); 361 res2.flags = res->flags; 362 res2.start = res->start + (size * offset); 363 res2.end = res2.start + (size * num_vfs) - 1; 364 365 if (res2.end > res->end) { 366 dev_err(&dev->dev, "VF BAR%d: %pR would extend past %pR (trying to enable %d VFs shifted by %d)\n", 367 i, &res2, res, num_vfs, offset); 368 return -EBUSY; 369 } 370 } 371 372 /* 373 * Since M64 BAR shares segments among all possible 256 PEs, 374 * we have to shift the beginning of PF IOV BAR to make it start from 375 * the segment which belongs to the PE number assigned to the first VF. 376 * This creates a "hole" in the /proc/iomem which could be used for 377 * allocating other resources so we reserve this area below and 378 * release when IOV is released. 379 */ 380 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 381 res = &dev->resource[i + PCI_IOV_RESOURCES]; 382 if (!res->flags || !res->parent) 383 continue; 384 385 size = pci_iov_resource_size(dev, i + PCI_IOV_RESOURCES); 386 res2 = *res; 387 res->start += size * offset; 388 389 dev_info(&dev->dev, "VF BAR%d: %pR shifted to %pR (%sabling %d VFs shifted by %d)\n", 390 i, &res2, res, (offset > 0) ? "En" : "Dis", 391 num_vfs, offset); 392 393 if (offset < 0) { 394 devm_release_resource(&dev->dev, &iov->holes[i]); 395 memset(&iov->holes[i], 0, sizeof(iov->holes[i])); 396 } 397 398 pci_update_resource(dev, i + PCI_IOV_RESOURCES); 399 400 if (offset > 0) { 401 iov->holes[i].start = res2.start; 402 iov->holes[i].end = res2.start + size * offset - 1; 403 iov->holes[i].flags = IORESOURCE_BUS; 404 iov->holes[i].name = "pnv_iov_reserved"; 405 devm_request_resource(&dev->dev, res->parent, 406 &iov->holes[i]); 407 } 408 } 409 return 0; 410 } 411 412 static void pnv_pci_sriov_disable(struct pci_dev *pdev) 413 { 414 struct pnv_phb *phb; 415 struct pnv_ioda_pe *pe; 416 struct pnv_iov_data *iov; 417 u16 num_vfs, i; 418 419 phb = pci_bus_to_pnvhb(pdev->bus); 420 iov = pnv_iov_get(pdev); 421 num_vfs = iov->num_vfs; 422 423 /* Release VF PEs */ 424 pnv_ioda_release_vf_PE(pdev); 425 426 if (phb->type == PNV_PHB_IODA2) { 427 if (!iov->m64_single_mode) 428 pnv_pci_vf_resource_shift(pdev, -*iov->pe_num_map); 429 430 /* Release M64 windows */ 431 pnv_pci_vf_release_m64(pdev, num_vfs); 432 433 /* Release PE numbers */ 434 if (iov->m64_single_mode) { 435 for (i = 0; i < num_vfs; i++) { 436 if (iov->pe_num_map[i] == IODA_INVALID_PE) 437 continue; 438 439 pe = &phb->ioda.pe_array[iov->pe_num_map[i]]; 440 pnv_ioda_free_pe(pe); 441 } 442 } else 443 bitmap_clear(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs); 444 /* Releasing pe_num_map */ 445 kfree(iov->pe_num_map); 446 } 447 } 448 449 static void pnv_ioda_setup_vf_PE(struct pci_dev *pdev, u16 num_vfs) 450 { 451 struct pnv_phb *phb; 452 struct pnv_ioda_pe *pe; 453 int pe_num; 454 u16 vf_index; 455 struct pnv_iov_data *iov; 456 struct pci_dn *pdn; 457 458 if (!pdev->is_physfn) 459 return; 460 461 phb = pci_bus_to_pnvhb(pdev->bus); 462 pdn = pci_get_pdn(pdev); 463 iov = pnv_iov_get(pdev); 464 465 /* Reserve PE for each VF */ 466 for (vf_index = 0; vf_index < num_vfs; vf_index++) { 467 int vf_devfn = pci_iov_virtfn_devfn(pdev, vf_index); 468 int vf_bus = pci_iov_virtfn_bus(pdev, vf_index); 469 struct pci_dn *vf_pdn; 470 471 if (iov->m64_single_mode) 472 pe_num = iov->pe_num_map[vf_index]; 473 else 474 pe_num = *iov->pe_num_map + vf_index; 475 476 pe = &phb->ioda.pe_array[pe_num]; 477 pe->pe_number = pe_num; 478 pe->phb = phb; 479 pe->flags = PNV_IODA_PE_VF; 480 pe->pbus = NULL; 481 pe->parent_dev = pdev; 482 pe->mve_number = -1; 483 pe->rid = (vf_bus << 8) | vf_devfn; 484 485 pe_info(pe, "VF %04d:%02d:%02d.%d associated with PE#%x\n", 486 pci_domain_nr(pdev->bus), pdev->bus->number, 487 PCI_SLOT(vf_devfn), PCI_FUNC(vf_devfn), pe_num); 488 489 if (pnv_ioda_configure_pe(phb, pe)) { 490 /* XXX What do we do here ? */ 491 pnv_ioda_free_pe(pe); 492 pe->pdev = NULL; 493 continue; 494 } 495 496 /* Put PE to the list */ 497 mutex_lock(&phb->ioda.pe_list_mutex); 498 list_add_tail(&pe->list, &phb->ioda.pe_list); 499 mutex_unlock(&phb->ioda.pe_list_mutex); 500 501 /* associate this pe to it's pdn */ 502 list_for_each_entry(vf_pdn, &pdn->parent->child_list, list) { 503 if (vf_pdn->busno == vf_bus && 504 vf_pdn->devfn == vf_devfn) { 505 vf_pdn->pe_number = pe_num; 506 break; 507 } 508 } 509 510 pnv_pci_ioda2_setup_dma_pe(phb, pe); 511 } 512 } 513 514 static int pnv_pci_sriov_enable(struct pci_dev *pdev, u16 num_vfs) 515 { 516 struct pnv_iov_data *iov; 517 struct pnv_phb *phb; 518 struct pnv_ioda_pe *pe; 519 int ret; 520 u16 i; 521 522 phb = pci_bus_to_pnvhb(pdev->bus); 523 iov = pnv_iov_get(pdev); 524 525 if (phb->type == PNV_PHB_IODA2) { 526 if (!iov->vfs_expanded) { 527 dev_info(&pdev->dev, 528 "don't support this SRIOV device with non 64bit-prefetchable IOV BAR\n"); 529 return -ENOSPC; 530 } 531 532 /* 533 * When M64 BARs functions in Single PE mode, the number of VFs 534 * could be enabled must be less than the number of M64 BARs. 535 */ 536 if (iov->m64_single_mode && num_vfs > phb->ioda.m64_bar_idx) { 537 dev_info(&pdev->dev, "Not enough M64 BAR for VFs\n"); 538 return -EBUSY; 539 } 540 541 /* Allocating pe_num_map */ 542 if (iov->m64_single_mode) 543 iov->pe_num_map = kmalloc_array(num_vfs, 544 sizeof(*iov->pe_num_map), 545 GFP_KERNEL); 546 else 547 iov->pe_num_map = kmalloc(sizeof(*iov->pe_num_map), GFP_KERNEL); 548 549 if (!iov->pe_num_map) 550 return -ENOMEM; 551 552 if (iov->m64_single_mode) 553 for (i = 0; i < num_vfs; i++) 554 iov->pe_num_map[i] = IODA_INVALID_PE; 555 556 /* Calculate available PE for required VFs */ 557 if (iov->m64_single_mode) { 558 for (i = 0; i < num_vfs; i++) { 559 pe = pnv_ioda_alloc_pe(phb); 560 if (!pe) { 561 ret = -EBUSY; 562 goto m64_failed; 563 } 564 565 iov->pe_num_map[i] = pe->pe_number; 566 } 567 } else { 568 mutex_lock(&phb->ioda.pe_alloc_mutex); 569 *iov->pe_num_map = bitmap_find_next_zero_area( 570 phb->ioda.pe_alloc, phb->ioda.total_pe_num, 571 0, num_vfs, 0); 572 if (*iov->pe_num_map >= phb->ioda.total_pe_num) { 573 mutex_unlock(&phb->ioda.pe_alloc_mutex); 574 dev_info(&pdev->dev, "Failed to enable VF%d\n", num_vfs); 575 kfree(iov->pe_num_map); 576 return -EBUSY; 577 } 578 bitmap_set(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs); 579 mutex_unlock(&phb->ioda.pe_alloc_mutex); 580 } 581 iov->num_vfs = num_vfs; 582 583 /* Assign M64 window accordingly */ 584 ret = pnv_pci_vf_assign_m64(pdev, num_vfs); 585 if (ret) { 586 dev_info(&pdev->dev, "Not enough M64 window resources\n"); 587 goto m64_failed; 588 } 589 590 /* 591 * When using one M64 BAR to map one IOV BAR, we need to shift 592 * the IOV BAR according to the PE# allocated to the VFs. 593 * Otherwise, the PE# for the VF will conflict with others. 594 */ 595 if (!iov->m64_single_mode) { 596 ret = pnv_pci_vf_resource_shift(pdev, *iov->pe_num_map); 597 if (ret) 598 goto m64_failed; 599 } 600 } 601 602 /* Setup VF PEs */ 603 pnv_ioda_setup_vf_PE(pdev, num_vfs); 604 605 return 0; 606 607 m64_failed: 608 if (iov->m64_single_mode) { 609 for (i = 0; i < num_vfs; i++) { 610 if (iov->pe_num_map[i] == IODA_INVALID_PE) 611 continue; 612 613 pe = &phb->ioda.pe_array[iov->pe_num_map[i]]; 614 pnv_ioda_free_pe(pe); 615 } 616 } else 617 bitmap_clear(phb->ioda.pe_alloc, *iov->pe_num_map, num_vfs); 618 619 /* Releasing pe_num_map */ 620 kfree(iov->pe_num_map); 621 622 return ret; 623 } 624 625 int pnv_pcibios_sriov_disable(struct pci_dev *pdev) 626 { 627 pnv_pci_sriov_disable(pdev); 628 629 /* Release PCI data */ 630 remove_sriov_vf_pdns(pdev); 631 return 0; 632 } 633 634 int pnv_pcibios_sriov_enable(struct pci_dev *pdev, u16 num_vfs) 635 { 636 /* Allocate PCI data */ 637 add_sriov_vf_pdns(pdev); 638 639 return pnv_pci_sriov_enable(pdev, num_vfs); 640 } 641