1 /* 2 * drivers/pci/iov.c 3 * 4 * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com> 5 * 6 * PCI Express I/O Virtualization (IOV) support. 7 * Single Root IOV 1.0 8 */ 9 10 #include <linux/pci.h> 11 #include <linux/mutex.h> 12 #include <linux/string.h> 13 #include <linux/delay.h> 14 #include "pci.h" 15 16 #define VIRTFN_ID_LEN 16 17 18 static inline u8 virtfn_bus(struct pci_dev *dev, int id) 19 { 20 return dev->bus->number + ((dev->devfn + dev->sriov->offset + 21 dev->sriov->stride * id) >> 8); 22 } 23 24 static inline u8 virtfn_devfn(struct pci_dev *dev, int id) 25 { 26 return (dev->devfn + dev->sriov->offset + 27 dev->sriov->stride * id) & 0xff; 28 } 29 30 static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) 31 { 32 int rc; 33 struct pci_bus *child; 34 35 if (bus->number == busnr) 36 return bus; 37 38 child = pci_find_bus(pci_domain_nr(bus), busnr); 39 if (child) 40 return child; 41 42 child = pci_add_new_bus(bus, NULL, busnr); 43 if (!child) 44 return NULL; 45 46 child->subordinate = busnr; 47 child->dev.parent = bus->bridge; 48 rc = pci_bus_add_child(child); 49 if (rc) { 50 pci_remove_bus(child); 51 return NULL; 52 } 53 54 return child; 55 } 56 57 static void virtfn_remove_bus(struct pci_bus *bus, int busnr) 58 { 59 struct pci_bus *child; 60 61 if (bus->number == busnr) 62 return; 63 64 child = pci_find_bus(pci_domain_nr(bus), busnr); 65 BUG_ON(!child); 66 67 if (list_empty(&child->devices)) 68 pci_remove_bus(child); 69 } 70 71 static int virtfn_add(struct pci_dev *dev, int id, int reset) 72 { 73 int i; 74 int rc; 75 u64 size; 76 char buf[VIRTFN_ID_LEN]; 77 struct pci_dev *virtfn; 78 struct resource *res; 79 struct pci_sriov *iov = dev->sriov; 80 81 virtfn = alloc_pci_dev(); 82 if (!virtfn) 83 return -ENOMEM; 84 85 mutex_lock(&iov->dev->sriov->lock); 86 virtfn->bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id)); 87 if (!virtfn->bus) { 88 kfree(virtfn); 89 mutex_unlock(&iov->dev->sriov->lock); 90 return -ENOMEM; 91 } 92 virtfn->devfn = virtfn_devfn(dev, id); 93 virtfn->vendor = dev->vendor; 94 pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device); 95 pci_setup_device(virtfn); 96 virtfn->dev.parent = dev->dev.parent; 97 98 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 99 res = dev->resource + PCI_IOV_RESOURCES + i; 100 if (!res->parent) 101 continue; 102 virtfn->resource[i].name = pci_name(virtfn); 103 virtfn->resource[i].flags = res->flags; 104 size = resource_size(res); 105 do_div(size, iov->total); 106 virtfn->resource[i].start = res->start + size * id; 107 virtfn->resource[i].end = virtfn->resource[i].start + size - 1; 108 rc = request_resource(res, &virtfn->resource[i]); 109 BUG_ON(rc); 110 } 111 112 if (reset) 113 pci_execute_reset_function(virtfn); 114 115 pci_device_add(virtfn, virtfn->bus); 116 mutex_unlock(&iov->dev->sriov->lock); 117 118 virtfn->physfn = pci_dev_get(dev); 119 virtfn->is_virtfn = 1; 120 121 rc = pci_bus_add_device(virtfn); 122 if (rc) 123 goto failed1; 124 sprintf(buf, "virtfn%u", id); 125 rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); 126 if (rc) 127 goto failed1; 128 rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn"); 129 if (rc) 130 goto failed2; 131 132 kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); 133 134 return 0; 135 136 failed2: 137 sysfs_remove_link(&dev->dev.kobj, buf); 138 failed1: 139 pci_dev_put(dev); 140 mutex_lock(&iov->dev->sriov->lock); 141 pci_remove_bus_device(virtfn); 142 virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); 143 mutex_unlock(&iov->dev->sriov->lock); 144 145 return rc; 146 } 147 148 static void virtfn_remove(struct pci_dev *dev, int id, int reset) 149 { 150 char buf[VIRTFN_ID_LEN]; 151 struct pci_bus *bus; 152 struct pci_dev *virtfn; 153 struct pci_sriov *iov = dev->sriov; 154 155 bus = pci_find_bus(pci_domain_nr(dev->bus), virtfn_bus(dev, id)); 156 if (!bus) 157 return; 158 159 virtfn = pci_get_slot(bus, virtfn_devfn(dev, id)); 160 if (!virtfn) 161 return; 162 163 pci_dev_put(virtfn); 164 165 if (reset) { 166 device_release_driver(&virtfn->dev); 167 pci_execute_reset_function(virtfn); 168 } 169 170 sprintf(buf, "virtfn%u", id); 171 sysfs_remove_link(&dev->dev.kobj, buf); 172 sysfs_remove_link(&virtfn->dev.kobj, "physfn"); 173 174 mutex_lock(&iov->dev->sriov->lock); 175 pci_remove_bus_device(virtfn); 176 virtfn_remove_bus(dev->bus, virtfn_bus(dev, id)); 177 mutex_unlock(&iov->dev->sriov->lock); 178 179 pci_dev_put(dev); 180 } 181 182 static int sriov_migration(struct pci_dev *dev) 183 { 184 u16 status; 185 struct pci_sriov *iov = dev->sriov; 186 187 if (!iov->nr_virtfn) 188 return 0; 189 190 if (!(iov->cap & PCI_SRIOV_CAP_VFM)) 191 return 0; 192 193 pci_read_config_word(dev, iov->pos + PCI_SRIOV_STATUS, &status); 194 if (!(status & PCI_SRIOV_STATUS_VFM)) 195 return 0; 196 197 schedule_work(&iov->mtask); 198 199 return 1; 200 } 201 202 static void sriov_migration_task(struct work_struct *work) 203 { 204 int i; 205 u8 state; 206 u16 status; 207 struct pci_sriov *iov = container_of(work, struct pci_sriov, mtask); 208 209 for (i = iov->initial; i < iov->nr_virtfn; i++) { 210 state = readb(iov->mstate + i); 211 if (state == PCI_SRIOV_VFM_MI) { 212 writeb(PCI_SRIOV_VFM_AV, iov->mstate + i); 213 state = readb(iov->mstate + i); 214 if (state == PCI_SRIOV_VFM_AV) 215 virtfn_add(iov->self, i, 1); 216 } else if (state == PCI_SRIOV_VFM_MO) { 217 virtfn_remove(iov->self, i, 1); 218 writeb(PCI_SRIOV_VFM_UA, iov->mstate + i); 219 state = readb(iov->mstate + i); 220 if (state == PCI_SRIOV_VFM_AV) 221 virtfn_add(iov->self, i, 0); 222 } 223 } 224 225 pci_read_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, &status); 226 status &= ~PCI_SRIOV_STATUS_VFM; 227 pci_write_config_word(iov->self, iov->pos + PCI_SRIOV_STATUS, status); 228 } 229 230 static int sriov_enable_migration(struct pci_dev *dev, int nr_virtfn) 231 { 232 int bir; 233 u32 table; 234 resource_size_t pa; 235 struct pci_sriov *iov = dev->sriov; 236 237 if (nr_virtfn <= iov->initial) 238 return 0; 239 240 pci_read_config_dword(dev, iov->pos + PCI_SRIOV_VFM, &table); 241 bir = PCI_SRIOV_VFM_BIR(table); 242 if (bir > PCI_STD_RESOURCE_END) 243 return -EIO; 244 245 table = PCI_SRIOV_VFM_OFFSET(table); 246 if (table + nr_virtfn > pci_resource_len(dev, bir)) 247 return -EIO; 248 249 pa = pci_resource_start(dev, bir) + table; 250 iov->mstate = ioremap(pa, nr_virtfn); 251 if (!iov->mstate) 252 return -ENOMEM; 253 254 INIT_WORK(&iov->mtask, sriov_migration_task); 255 256 iov->ctrl |= PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR; 257 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); 258 259 return 0; 260 } 261 262 static void sriov_disable_migration(struct pci_dev *dev) 263 { 264 struct pci_sriov *iov = dev->sriov; 265 266 iov->ctrl &= ~(PCI_SRIOV_CTRL_VFM | PCI_SRIOV_CTRL_INTR); 267 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); 268 269 cancel_work_sync(&iov->mtask); 270 iounmap(iov->mstate); 271 } 272 273 static int sriov_enable(struct pci_dev *dev, int nr_virtfn) 274 { 275 int rc; 276 int i, j; 277 int nres; 278 u16 offset, stride, initial; 279 struct resource *res; 280 struct pci_dev *pdev; 281 struct pci_sriov *iov = dev->sriov; 282 283 if (!nr_virtfn) 284 return 0; 285 286 if (iov->nr_virtfn) 287 return -EINVAL; 288 289 pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial); 290 if (initial > iov->total || 291 (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total))) 292 return -EIO; 293 294 if (nr_virtfn < 0 || nr_virtfn > iov->total || 295 (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial))) 296 return -EINVAL; 297 298 pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); 299 pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset); 300 pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride); 301 if (!offset || (nr_virtfn > 1 && !stride)) 302 return -EIO; 303 304 nres = 0; 305 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 306 res = dev->resource + PCI_IOV_RESOURCES + i; 307 if (res->parent) 308 nres++; 309 } 310 if (nres != iov->nres) { 311 dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n"); 312 return -ENOMEM; 313 } 314 315 iov->offset = offset; 316 iov->stride = stride; 317 318 if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->subordinate) { 319 dev_err(&dev->dev, "SR-IOV: bus number out of range\n"); 320 return -ENOMEM; 321 } 322 323 if (iov->link != dev->devfn) { 324 pdev = pci_get_slot(dev->bus, iov->link); 325 if (!pdev) 326 return -ENODEV; 327 328 pci_dev_put(pdev); 329 330 if (!pdev->is_physfn) 331 return -ENODEV; 332 333 rc = sysfs_create_link(&dev->dev.kobj, 334 &pdev->dev.kobj, "dep_link"); 335 if (rc) 336 return rc; 337 } 338 339 iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; 340 pci_block_user_cfg_access(dev); 341 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); 342 msleep(100); 343 pci_unblock_user_cfg_access(dev); 344 345 iov->initial = initial; 346 if (nr_virtfn < initial) 347 initial = nr_virtfn; 348 349 for (i = 0; i < initial; i++) { 350 rc = virtfn_add(dev, i, 0); 351 if (rc) 352 goto failed; 353 } 354 355 if (iov->cap & PCI_SRIOV_CAP_VFM) { 356 rc = sriov_enable_migration(dev, nr_virtfn); 357 if (rc) 358 goto failed; 359 } 360 361 kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE); 362 iov->nr_virtfn = nr_virtfn; 363 364 return 0; 365 366 failed: 367 for (j = 0; j < i; j++) 368 virtfn_remove(dev, j, 0); 369 370 iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); 371 pci_block_user_cfg_access(dev); 372 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); 373 ssleep(1); 374 pci_unblock_user_cfg_access(dev); 375 376 if (iov->link != dev->devfn) 377 sysfs_remove_link(&dev->dev.kobj, "dep_link"); 378 379 return rc; 380 } 381 382 static void sriov_disable(struct pci_dev *dev) 383 { 384 int i; 385 struct pci_sriov *iov = dev->sriov; 386 387 if (!iov->nr_virtfn) 388 return; 389 390 if (iov->cap & PCI_SRIOV_CAP_VFM) 391 sriov_disable_migration(dev); 392 393 for (i = 0; i < iov->nr_virtfn; i++) 394 virtfn_remove(dev, i, 0); 395 396 iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); 397 pci_block_user_cfg_access(dev); 398 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); 399 ssleep(1); 400 pci_unblock_user_cfg_access(dev); 401 402 if (iov->link != dev->devfn) 403 sysfs_remove_link(&dev->dev.kobj, "dep_link"); 404 405 iov->nr_virtfn = 0; 406 } 407 408 static int sriov_init(struct pci_dev *dev, int pos) 409 { 410 int i; 411 int rc; 412 int nres; 413 u32 pgsz; 414 u16 ctrl, total, offset, stride; 415 struct pci_sriov *iov; 416 struct resource *res; 417 struct pci_dev *pdev; 418 419 if (dev->pcie_type != PCI_EXP_TYPE_RC_END && 420 dev->pcie_type != PCI_EXP_TYPE_ENDPOINT) 421 return -ENODEV; 422 423 pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl); 424 if (ctrl & PCI_SRIOV_CTRL_VFE) { 425 pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0); 426 ssleep(1); 427 } 428 429 pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total); 430 if (!total) 431 return 0; 432 433 ctrl = 0; 434 list_for_each_entry(pdev, &dev->bus->devices, bus_list) 435 if (pdev->is_physfn) 436 goto found; 437 438 pdev = NULL; 439 if (pci_ari_enabled(dev->bus)) 440 ctrl |= PCI_SRIOV_CTRL_ARI; 441 442 found: 443 pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl); 444 pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, total); 445 pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset); 446 pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride); 447 if (!offset || (total > 1 && !stride)) 448 return -EIO; 449 450 pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz); 451 i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0; 452 pgsz &= ~((1 << i) - 1); 453 if (!pgsz) 454 return -EIO; 455 456 pgsz &= ~(pgsz - 1); 457 pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz); 458 459 nres = 0; 460 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 461 res = dev->resource + PCI_IOV_RESOURCES + i; 462 i += __pci_read_base(dev, pci_bar_unknown, res, 463 pos + PCI_SRIOV_BAR + i * 4); 464 if (!res->flags) 465 continue; 466 if (resource_size(res) & (PAGE_SIZE - 1)) { 467 rc = -EIO; 468 goto failed; 469 } 470 res->end = res->start + resource_size(res) * total - 1; 471 nres++; 472 } 473 474 iov = kzalloc(sizeof(*iov), GFP_KERNEL); 475 if (!iov) { 476 rc = -ENOMEM; 477 goto failed; 478 } 479 480 iov->pos = pos; 481 iov->nres = nres; 482 iov->ctrl = ctrl; 483 iov->total = total; 484 iov->offset = offset; 485 iov->stride = stride; 486 iov->pgsz = pgsz; 487 iov->self = dev; 488 pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap); 489 pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link); 490 491 if (pdev) 492 iov->dev = pci_dev_get(pdev); 493 else { 494 iov->dev = dev; 495 mutex_init(&iov->lock); 496 } 497 498 dev->sriov = iov; 499 dev->is_physfn = 1; 500 501 return 0; 502 503 failed: 504 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 505 res = dev->resource + PCI_IOV_RESOURCES + i; 506 res->flags = 0; 507 } 508 509 return rc; 510 } 511 512 static void sriov_release(struct pci_dev *dev) 513 { 514 BUG_ON(dev->sriov->nr_virtfn); 515 516 if (dev == dev->sriov->dev) 517 mutex_destroy(&dev->sriov->lock); 518 else 519 pci_dev_put(dev->sriov->dev); 520 521 kfree(dev->sriov); 522 dev->sriov = NULL; 523 } 524 525 static void sriov_restore_state(struct pci_dev *dev) 526 { 527 int i; 528 u16 ctrl; 529 struct pci_sriov *iov = dev->sriov; 530 531 pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl); 532 if (ctrl & PCI_SRIOV_CTRL_VFE) 533 return; 534 535 for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) 536 pci_update_resource(dev, i); 537 538 pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); 539 pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->nr_virtfn); 540 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); 541 if (iov->ctrl & PCI_SRIOV_CTRL_VFE) 542 msleep(100); 543 } 544 545 /** 546 * pci_iov_init - initialize the IOV capability 547 * @dev: the PCI device 548 * 549 * Returns 0 on success, or negative on failure. 550 */ 551 int pci_iov_init(struct pci_dev *dev) 552 { 553 int pos; 554 555 if (!dev->is_pcie) 556 return -ENODEV; 557 558 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); 559 if (pos) 560 return sriov_init(dev, pos); 561 562 return -ENODEV; 563 } 564 565 /** 566 * pci_iov_release - release resources used by the IOV capability 567 * @dev: the PCI device 568 */ 569 void pci_iov_release(struct pci_dev *dev) 570 { 571 if (dev->is_physfn) 572 sriov_release(dev); 573 } 574 575 /** 576 * pci_iov_resource_bar - get position of the SR-IOV BAR 577 * @dev: the PCI device 578 * @resno: the resource number 579 * @type: the BAR type to be filled in 580 * 581 * Returns position of the BAR encapsulated in the SR-IOV capability. 582 */ 583 int pci_iov_resource_bar(struct pci_dev *dev, int resno, 584 enum pci_bar_type *type) 585 { 586 if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END) 587 return 0; 588 589 BUG_ON(!dev->is_physfn); 590 591 *type = pci_bar_unknown; 592 593 return dev->sriov->pos + PCI_SRIOV_BAR + 594 4 * (resno - PCI_IOV_RESOURCES); 595 } 596 597 /** 598 * pci_restore_iov_state - restore the state of the IOV capability 599 * @dev: the PCI device 600 */ 601 void pci_restore_iov_state(struct pci_dev *dev) 602 { 603 if (dev->is_physfn) 604 sriov_restore_state(dev); 605 } 606 607 /** 608 * pci_iov_bus_range - find bus range used by Virtual Function 609 * @bus: the PCI bus 610 * 611 * Returns max number of buses (exclude current one) used by Virtual 612 * Functions. 613 */ 614 int pci_iov_bus_range(struct pci_bus *bus) 615 { 616 int max = 0; 617 u8 busnr; 618 struct pci_dev *dev; 619 620 list_for_each_entry(dev, &bus->devices, bus_list) { 621 if (!dev->is_physfn) 622 continue; 623 busnr = virtfn_bus(dev, dev->sriov->total - 1); 624 if (busnr > max) 625 max = busnr; 626 } 627 628 return max ? max - bus->number : 0; 629 } 630 631 /** 632 * pci_enable_sriov - enable the SR-IOV capability 633 * @dev: the PCI device 634 * 635 * Returns 0 on success, or negative on failure. 636 */ 637 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) 638 { 639 might_sleep(); 640 641 if (!dev->is_physfn) 642 return -ENODEV; 643 644 return sriov_enable(dev, nr_virtfn); 645 } 646 EXPORT_SYMBOL_GPL(pci_enable_sriov); 647 648 /** 649 * pci_disable_sriov - disable the SR-IOV capability 650 * @dev: the PCI device 651 */ 652 void pci_disable_sriov(struct pci_dev *dev) 653 { 654 might_sleep(); 655 656 if (!dev->is_physfn) 657 return; 658 659 sriov_disable(dev); 660 } 661 EXPORT_SYMBOL_GPL(pci_disable_sriov); 662 663 /** 664 * pci_sriov_migration - notify SR-IOV core of Virtual Function Migration 665 * @dev: the PCI device 666 * 667 * Returns IRQ_HANDLED if the IRQ is handled, or IRQ_NONE if not. 668 * 669 * Physical Function driver is responsible to register IRQ handler using 670 * VF Migration Interrupt Message Number, and call this function when the 671 * interrupt is generated by the hardware. 672 */ 673 irqreturn_t pci_sriov_migration(struct pci_dev *dev) 674 { 675 if (!dev->is_physfn) 676 return IRQ_NONE; 677 678 return sriov_migration(dev) ? IRQ_HANDLED : IRQ_NONE; 679 } 680 EXPORT_SYMBOL_GPL(pci_sriov_migration); 681