1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * s390 kvm PCI passthrough support 4 * 5 * Copyright IBM Corp. 2022 6 * 7 * Author(s): Matthew Rosato <mjrosato@linux.ibm.com> 8 */ 9 10 #include <linux/kvm_host.h> 11 #include <linux/pci.h> 12 #include <asm/pci.h> 13 #include <asm/pci_insn.h> 14 #include <asm/pci_io.h> 15 #include <asm/sclp.h> 16 #include "pci.h" 17 #include "kvm-s390.h" 18 19 struct zpci_aift *aift; 20 21 static inline int __set_irq_noiib(u16 ctl, u8 isc) 22 { 23 union zpci_sic_iib iib = {{0}}; 24 25 return zpci_set_irq_ctrl(ctl, isc, &iib); 26 } 27 28 void kvm_s390_pci_aen_exit(void) 29 { 30 unsigned long flags; 31 struct kvm_zdev **gait_kzdev; 32 33 lockdep_assert_held(&aift->aift_lock); 34 35 /* 36 * Contents of the aipb remain registered for the life of the host 37 * kernel, the information preserved in zpci_aipb and zpci_aif_sbv 38 * in case we insert the KVM module again later. Clear the AIFT 39 * information and free anything not registered with underlying 40 * firmware. 41 */ 42 spin_lock_irqsave(&aift->gait_lock, flags); 43 gait_kzdev = aift->kzdev; 44 aift->gait = NULL; 45 aift->sbv = NULL; 46 aift->kzdev = NULL; 47 spin_unlock_irqrestore(&aift->gait_lock, flags); 48 49 kfree(gait_kzdev); 50 } 51 52 static int zpci_setup_aipb(u8 nisc) 53 { 54 struct page *page; 55 int size, rc; 56 57 zpci_aipb = kzalloc(sizeof(union zpci_sic_iib), GFP_KERNEL); 58 if (!zpci_aipb) 59 return -ENOMEM; 60 61 aift->sbv = airq_iv_create(ZPCI_NR_DEVICES, AIRQ_IV_ALLOC, 0); 62 if (!aift->sbv) { 63 rc = -ENOMEM; 64 goto free_aipb; 65 } 66 zpci_aif_sbv = aift->sbv; 67 size = get_order(PAGE_ALIGN(ZPCI_NR_DEVICES * 68 sizeof(struct zpci_gaite))); 69 page = alloc_pages(GFP_KERNEL | __GFP_ZERO, size); 70 if (!page) { 71 rc = -ENOMEM; 72 goto free_sbv; 73 } 74 aift->gait = (struct zpci_gaite *)page_to_phys(page); 75 76 zpci_aipb->aipb.faisb = virt_to_phys(aift->sbv->vector); 77 zpci_aipb->aipb.gait = virt_to_phys(aift->gait); 78 zpci_aipb->aipb.afi = nisc; 79 zpci_aipb->aipb.faal = ZPCI_NR_DEVICES; 80 81 /* Setup Adapter Event Notification Interpretation */ 82 if (zpci_set_irq_ctrl(SIC_SET_AENI_CONTROLS, 0, zpci_aipb)) { 83 rc = -EIO; 84 goto free_gait; 85 } 86 87 return 0; 88 89 free_gait: 90 free_pages((unsigned long)aift->gait, size); 91 free_sbv: 92 airq_iv_release(aift->sbv); 93 zpci_aif_sbv = NULL; 94 free_aipb: 95 kfree(zpci_aipb); 96 zpci_aipb = NULL; 97 98 return rc; 99 } 100 101 static int zpci_reset_aipb(u8 nisc) 102 { 103 /* 104 * AEN registration can only happen once per system boot. If 105 * an aipb already exists then AEN was already registered and 106 * we can re-use the aipb contents. This can only happen if 107 * the KVM module was removed and re-inserted. However, we must 108 * ensure that the same forwarding ISC is used as this is assigned 109 * during KVM module load. 110 */ 111 if (zpci_aipb->aipb.afi != nisc) 112 return -EINVAL; 113 114 aift->sbv = zpci_aif_sbv; 115 aift->gait = (struct zpci_gaite *)zpci_aipb->aipb.gait; 116 117 return 0; 118 } 119 120 int kvm_s390_pci_aen_init(u8 nisc) 121 { 122 int rc = 0; 123 124 /* If already enabled for AEN, bail out now */ 125 if (aift->gait || aift->sbv) 126 return -EPERM; 127 128 mutex_lock(&aift->aift_lock); 129 aift->kzdev = kcalloc(ZPCI_NR_DEVICES, sizeof(struct kvm_zdev), 130 GFP_KERNEL); 131 if (!aift->kzdev) { 132 rc = -ENOMEM; 133 goto unlock; 134 } 135 136 if (!zpci_aipb) 137 rc = zpci_setup_aipb(nisc); 138 else 139 rc = zpci_reset_aipb(nisc); 140 if (rc) 141 goto free_zdev; 142 143 /* Enable floating IRQs */ 144 if (__set_irq_noiib(SIC_IRQ_MODE_SINGLE, nisc)) { 145 rc = -EIO; 146 kvm_s390_pci_aen_exit(); 147 } 148 149 goto unlock; 150 151 free_zdev: 152 kfree(aift->kzdev); 153 unlock: 154 mutex_unlock(&aift->aift_lock); 155 return rc; 156 } 157 158 /* Modify PCI: Register floating adapter interruption forwarding */ 159 static int kvm_zpci_set_airq(struct zpci_dev *zdev) 160 { 161 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_REG_INT); 162 struct zpci_fib fib = {}; 163 u8 status; 164 165 fib.fmt0.isc = zdev->kzdev->fib.fmt0.isc; 166 fib.fmt0.sum = 1; /* enable summary notifications */ 167 fib.fmt0.noi = airq_iv_end(zdev->aibv); 168 fib.fmt0.aibv = virt_to_phys(zdev->aibv->vector); 169 fib.fmt0.aibvo = 0; 170 fib.fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8); 171 fib.fmt0.aisbo = zdev->aisb & 63; 172 fib.gd = zdev->gisa; 173 174 return zpci_mod_fc(req, &fib, &status) ? -EIO : 0; 175 } 176 177 /* Modify PCI: Unregister floating adapter interruption forwarding */ 178 static int kvm_zpci_clear_airq(struct zpci_dev *zdev) 179 { 180 u64 req = ZPCI_CREATE_REQ(zdev->fh, 0, ZPCI_MOD_FC_DEREG_INT); 181 struct zpci_fib fib = {}; 182 u8 cc, status; 183 184 fib.gd = zdev->gisa; 185 186 cc = zpci_mod_fc(req, &fib, &status); 187 if (cc == 3 || (cc == 1 && status == 24)) 188 /* Function already gone or IRQs already deregistered. */ 189 cc = 0; 190 191 return cc ? -EIO : 0; 192 } 193 194 static inline void unaccount_mem(unsigned long nr_pages) 195 { 196 struct user_struct *user = get_uid(current_user()); 197 198 if (user) 199 atomic_long_sub(nr_pages, &user->locked_vm); 200 if (current->mm) 201 atomic64_sub(nr_pages, ¤t->mm->pinned_vm); 202 } 203 204 static inline int account_mem(unsigned long nr_pages) 205 { 206 struct user_struct *user = get_uid(current_user()); 207 unsigned long page_limit, cur_pages, new_pages; 208 209 page_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; 210 211 do { 212 cur_pages = atomic_long_read(&user->locked_vm); 213 new_pages = cur_pages + nr_pages; 214 if (new_pages > page_limit) 215 return -ENOMEM; 216 } while (atomic_long_cmpxchg(&user->locked_vm, cur_pages, 217 new_pages) != cur_pages); 218 219 atomic64_add(nr_pages, ¤t->mm->pinned_vm); 220 221 return 0; 222 } 223 224 static int kvm_s390_pci_aif_enable(struct zpci_dev *zdev, struct zpci_fib *fib, 225 bool assist) 226 { 227 struct page *pages[1], *aibv_page, *aisb_page = NULL; 228 unsigned int msi_vecs, idx; 229 struct zpci_gaite *gaite; 230 unsigned long hva, bit; 231 struct kvm *kvm; 232 phys_addr_t gaddr; 233 int rc = 0, gisc, npages, pcount = 0; 234 235 /* 236 * Interrupt forwarding is only applicable if the device is already 237 * enabled for interpretation 238 */ 239 if (zdev->gisa == 0) 240 return -EINVAL; 241 242 kvm = zdev->kzdev->kvm; 243 msi_vecs = min_t(unsigned int, fib->fmt0.noi, zdev->max_msi); 244 245 /* Get the associated forwarding ISC - if invalid, return the error */ 246 gisc = kvm_s390_gisc_register(kvm, fib->fmt0.isc); 247 if (gisc < 0) 248 return gisc; 249 250 /* Replace AIBV address */ 251 idx = srcu_read_lock(&kvm->srcu); 252 hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aibv)); 253 npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, pages); 254 srcu_read_unlock(&kvm->srcu, idx); 255 if (npages < 1) { 256 rc = -EIO; 257 goto out; 258 } 259 aibv_page = pages[0]; 260 pcount++; 261 gaddr = page_to_phys(aibv_page) + (fib->fmt0.aibv & ~PAGE_MASK); 262 fib->fmt0.aibv = gaddr; 263 264 /* Pin the guest AISB if one was specified */ 265 if (fib->fmt0.sum == 1) { 266 idx = srcu_read_lock(&kvm->srcu); 267 hva = gfn_to_hva(kvm, gpa_to_gfn((gpa_t)fib->fmt0.aisb)); 268 npages = pin_user_pages_fast(hva, 1, FOLL_WRITE | FOLL_LONGTERM, 269 pages); 270 srcu_read_unlock(&kvm->srcu, idx); 271 if (npages < 1) { 272 rc = -EIO; 273 goto unpin1; 274 } 275 aisb_page = pages[0]; 276 pcount++; 277 } 278 279 /* Account for pinned pages, roll back on failure */ 280 if (account_mem(pcount)) 281 goto unpin2; 282 283 /* AISB must be allocated before we can fill in GAITE */ 284 mutex_lock(&aift->aift_lock); 285 bit = airq_iv_alloc_bit(aift->sbv); 286 if (bit == -1UL) 287 goto unlock; 288 zdev->aisb = bit; /* store the summary bit number */ 289 zdev->aibv = airq_iv_create(msi_vecs, AIRQ_IV_DATA | 290 AIRQ_IV_BITLOCK | 291 AIRQ_IV_GUESTVEC, 292 phys_to_virt(fib->fmt0.aibv)); 293 294 spin_lock_irq(&aift->gait_lock); 295 gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb * 296 sizeof(struct zpci_gaite)); 297 298 /* If assist not requested, host will get all alerts */ 299 if (assist) 300 gaite->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); 301 else 302 gaite->gisa = 0; 303 304 gaite->gisc = fib->fmt0.isc; 305 gaite->count++; 306 gaite->aisbo = fib->fmt0.aisbo; 307 gaite->aisb = virt_to_phys(page_address(aisb_page) + (fib->fmt0.aisb & 308 ~PAGE_MASK)); 309 aift->kzdev[zdev->aisb] = zdev->kzdev; 310 spin_unlock_irq(&aift->gait_lock); 311 312 /* Update guest FIB for re-issue */ 313 fib->fmt0.aisbo = zdev->aisb & 63; 314 fib->fmt0.aisb = virt_to_phys(aift->sbv->vector + (zdev->aisb / 64) * 8); 315 fib->fmt0.isc = gisc; 316 317 /* Save some guest fib values in the host for later use */ 318 zdev->kzdev->fib.fmt0.isc = fib->fmt0.isc; 319 zdev->kzdev->fib.fmt0.aibv = fib->fmt0.aibv; 320 mutex_unlock(&aift->aift_lock); 321 322 /* Issue the clp to setup the irq now */ 323 rc = kvm_zpci_set_airq(zdev); 324 return rc; 325 326 unlock: 327 mutex_unlock(&aift->aift_lock); 328 unpin2: 329 if (fib->fmt0.sum == 1) 330 unpin_user_page(aisb_page); 331 unpin1: 332 unpin_user_page(aibv_page); 333 out: 334 return rc; 335 } 336 337 static int kvm_s390_pci_aif_disable(struct zpci_dev *zdev, bool force) 338 { 339 struct kvm_zdev *kzdev = zdev->kzdev; 340 struct zpci_gaite *gaite; 341 struct page *vpage = NULL, *spage = NULL; 342 int rc, pcount = 0; 343 u8 isc; 344 345 if (zdev->gisa == 0) 346 return -EINVAL; 347 348 mutex_lock(&aift->aift_lock); 349 350 /* 351 * If the clear fails due to an error, leave now unless we know this 352 * device is about to go away (force) -- In that case clear the GAITE 353 * regardless. 354 */ 355 rc = kvm_zpci_clear_airq(zdev); 356 if (rc && !force) 357 goto out; 358 359 if (zdev->kzdev->fib.fmt0.aibv == 0) 360 goto out; 361 spin_lock_irq(&aift->gait_lock); 362 gaite = (struct zpci_gaite *)aift->gait + (zdev->aisb * 363 sizeof(struct zpci_gaite)); 364 isc = gaite->gisc; 365 gaite->count--; 366 if (gaite->count == 0) { 367 /* Release guest AIBV and AISB */ 368 vpage = phys_to_page(kzdev->fib.fmt0.aibv); 369 if (gaite->aisb != 0) 370 spage = phys_to_page(gaite->aisb); 371 /* Clear the GAIT entry */ 372 gaite->aisb = 0; 373 gaite->gisc = 0; 374 gaite->aisbo = 0; 375 gaite->gisa = 0; 376 aift->kzdev[zdev->aisb] = 0; 377 /* Clear zdev info */ 378 airq_iv_free_bit(aift->sbv, zdev->aisb); 379 airq_iv_release(zdev->aibv); 380 zdev->aisb = 0; 381 zdev->aibv = NULL; 382 } 383 spin_unlock_irq(&aift->gait_lock); 384 kvm_s390_gisc_unregister(kzdev->kvm, isc); 385 kzdev->fib.fmt0.isc = 0; 386 kzdev->fib.fmt0.aibv = 0; 387 388 if (vpage) { 389 unpin_user_page(vpage); 390 pcount++; 391 } 392 if (spage) { 393 unpin_user_page(spage); 394 pcount++; 395 } 396 if (pcount > 0) 397 unaccount_mem(pcount); 398 out: 399 mutex_unlock(&aift->aift_lock); 400 401 return rc; 402 } 403 404 static int kvm_s390_pci_dev_open(struct zpci_dev *zdev) 405 { 406 struct kvm_zdev *kzdev; 407 408 kzdev = kzalloc(sizeof(struct kvm_zdev), GFP_KERNEL); 409 if (!kzdev) 410 return -ENOMEM; 411 412 kzdev->zdev = zdev; 413 zdev->kzdev = kzdev; 414 415 return 0; 416 } 417 418 static void kvm_s390_pci_dev_release(struct zpci_dev *zdev) 419 { 420 struct kvm_zdev *kzdev; 421 422 kzdev = zdev->kzdev; 423 WARN_ON(kzdev->zdev != zdev); 424 zdev->kzdev = NULL; 425 kfree(kzdev); 426 } 427 428 429 /* 430 * Register device with the specified KVM. If interpetation facilities are 431 * available, enable them and let userspace indicate whether or not they will 432 * be used (specify SHM bit to disable). 433 */ 434 static int kvm_s390_pci_register_kvm(void *opaque, struct kvm *kvm) 435 { 436 struct zpci_dev *zdev = opaque; 437 int rc; 438 439 if (!zdev) 440 return -EINVAL; 441 442 mutex_lock(&zdev->kzdev_lock); 443 444 if (zdev->kzdev || zdev->gisa != 0 || !kvm) { 445 mutex_unlock(&zdev->kzdev_lock); 446 return -EINVAL; 447 } 448 449 kvm_get_kvm(kvm); 450 451 mutex_lock(&kvm->lock); 452 453 rc = kvm_s390_pci_dev_open(zdev); 454 if (rc) 455 goto err; 456 457 /* 458 * If interpretation facilities aren't available, add the device to 459 * the kzdev list but don't enable for interpretation. 460 */ 461 if (!kvm_s390_pci_interp_allowed()) 462 goto out; 463 464 /* 465 * If this is the first request to use an interpreted device, make the 466 * necessary vcpu changes 467 */ 468 if (!kvm->arch.use_zpci_interp) 469 kvm_s390_vcpu_pci_enable_interp(kvm); 470 471 if (zdev_enabled(zdev)) { 472 rc = zpci_disable_device(zdev); 473 if (rc) 474 goto err; 475 } 476 477 /* 478 * Store information about the identity of the kvm guest allowed to 479 * access this device via interpretation to be used by host CLP 480 */ 481 zdev->gisa = (u32)virt_to_phys(&kvm->arch.sie_page2->gisa); 482 483 rc = zpci_enable_device(zdev); 484 if (rc) 485 goto clear_gisa; 486 487 /* Re-register the IOMMU that was already created */ 488 rc = zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 489 virt_to_phys(zdev->dma_table)); 490 if (rc) 491 goto clear_gisa; 492 493 out: 494 zdev->kzdev->kvm = kvm; 495 496 spin_lock(&kvm->arch.kzdev_list_lock); 497 list_add_tail(&zdev->kzdev->entry, &kvm->arch.kzdev_list); 498 spin_unlock(&kvm->arch.kzdev_list_lock); 499 500 mutex_unlock(&kvm->lock); 501 mutex_unlock(&zdev->kzdev_lock); 502 return 0; 503 504 clear_gisa: 505 zdev->gisa = 0; 506 err: 507 if (zdev->kzdev) 508 kvm_s390_pci_dev_release(zdev); 509 mutex_unlock(&kvm->lock); 510 mutex_unlock(&zdev->kzdev_lock); 511 kvm_put_kvm(kvm); 512 return rc; 513 } 514 515 static void kvm_s390_pci_unregister_kvm(void *opaque) 516 { 517 struct zpci_dev *zdev = opaque; 518 struct kvm *kvm; 519 520 if (!zdev) 521 return; 522 523 mutex_lock(&zdev->kzdev_lock); 524 525 if (WARN_ON(!zdev->kzdev)) { 526 mutex_unlock(&zdev->kzdev_lock); 527 return; 528 } 529 530 kvm = zdev->kzdev->kvm; 531 mutex_lock(&kvm->lock); 532 533 /* 534 * A 0 gisa means interpretation was never enabled, just remove the 535 * device from the list. 536 */ 537 if (zdev->gisa == 0) 538 goto out; 539 540 /* Forwarding must be turned off before interpretation */ 541 if (zdev->kzdev->fib.fmt0.aibv != 0) 542 kvm_s390_pci_aif_disable(zdev, true); 543 544 /* Remove the host CLP guest designation */ 545 zdev->gisa = 0; 546 547 if (zdev_enabled(zdev)) { 548 if (zpci_disable_device(zdev)) 549 goto out; 550 } 551 552 if (zpci_enable_device(zdev)) 553 goto out; 554 555 /* Re-register the IOMMU that was already created */ 556 zpci_register_ioat(zdev, 0, zdev->start_dma, zdev->end_dma, 557 virt_to_phys(zdev->dma_table)); 558 559 out: 560 spin_lock(&kvm->arch.kzdev_list_lock); 561 list_del(&zdev->kzdev->entry); 562 spin_unlock(&kvm->arch.kzdev_list_lock); 563 kvm_s390_pci_dev_release(zdev); 564 565 mutex_unlock(&kvm->lock); 566 mutex_unlock(&zdev->kzdev_lock); 567 568 kvm_put_kvm(kvm); 569 } 570 571 void kvm_s390_pci_init_list(struct kvm *kvm) 572 { 573 spin_lock_init(&kvm->arch.kzdev_list_lock); 574 INIT_LIST_HEAD(&kvm->arch.kzdev_list); 575 } 576 577 void kvm_s390_pci_clear_list(struct kvm *kvm) 578 { 579 /* 580 * This list should already be empty, either via vfio device closures 581 * or kvm fd cleanup. 582 */ 583 spin_lock(&kvm->arch.kzdev_list_lock); 584 WARN_ON_ONCE(!list_empty(&kvm->arch.kzdev_list)); 585 spin_unlock(&kvm->arch.kzdev_list_lock); 586 } 587 588 static struct zpci_dev *get_zdev_from_kvm_by_fh(struct kvm *kvm, u32 fh) 589 { 590 struct zpci_dev *zdev = NULL; 591 struct kvm_zdev *kzdev; 592 593 spin_lock(&kvm->arch.kzdev_list_lock); 594 list_for_each_entry(kzdev, &kvm->arch.kzdev_list, entry) { 595 if (kzdev->zdev->fh == fh) { 596 zdev = kzdev->zdev; 597 break; 598 } 599 } 600 spin_unlock(&kvm->arch.kzdev_list_lock); 601 602 return zdev; 603 } 604 605 static int kvm_s390_pci_zpci_reg_aen(struct zpci_dev *zdev, 606 struct kvm_s390_zpci_op *args) 607 { 608 struct zpci_fib fib = {}; 609 bool hostflag; 610 611 fib.fmt0.aibv = args->u.reg_aen.ibv; 612 fib.fmt0.isc = args->u.reg_aen.isc; 613 fib.fmt0.noi = args->u.reg_aen.noi; 614 if (args->u.reg_aen.sb != 0) { 615 fib.fmt0.aisb = args->u.reg_aen.sb; 616 fib.fmt0.aisbo = args->u.reg_aen.sbo; 617 fib.fmt0.sum = 1; 618 } else { 619 fib.fmt0.aisb = 0; 620 fib.fmt0.aisbo = 0; 621 fib.fmt0.sum = 0; 622 } 623 624 hostflag = !(args->u.reg_aen.flags & KVM_S390_ZPCIOP_REGAEN_HOST); 625 return kvm_s390_pci_aif_enable(zdev, &fib, hostflag); 626 } 627 628 int kvm_s390_pci_zpci_op(struct kvm *kvm, struct kvm_s390_zpci_op *args) 629 { 630 struct kvm_zdev *kzdev; 631 struct zpci_dev *zdev; 632 int r; 633 634 zdev = get_zdev_from_kvm_by_fh(kvm, args->fh); 635 if (!zdev) 636 return -ENODEV; 637 638 mutex_lock(&zdev->kzdev_lock); 639 mutex_lock(&kvm->lock); 640 641 kzdev = zdev->kzdev; 642 if (!kzdev) { 643 r = -ENODEV; 644 goto out; 645 } 646 if (kzdev->kvm != kvm) { 647 r = -EPERM; 648 goto out; 649 } 650 651 switch (args->op) { 652 case KVM_S390_ZPCIOP_REG_AEN: 653 /* Fail on unknown flags */ 654 if (args->u.reg_aen.flags & ~KVM_S390_ZPCIOP_REGAEN_HOST) { 655 r = -EINVAL; 656 break; 657 } 658 r = kvm_s390_pci_zpci_reg_aen(zdev, args); 659 break; 660 case KVM_S390_ZPCIOP_DEREG_AEN: 661 r = kvm_s390_pci_aif_disable(zdev, false); 662 break; 663 default: 664 r = -EINVAL; 665 } 666 667 out: 668 mutex_unlock(&kvm->lock); 669 mutex_unlock(&zdev->kzdev_lock); 670 return r; 671 } 672 673 int kvm_s390_pci_init(void) 674 { 675 aift = kzalloc(sizeof(struct zpci_aift), GFP_KERNEL); 676 if (!aift) 677 return -ENOMEM; 678 679 spin_lock_init(&aift->gait_lock); 680 mutex_init(&aift->aift_lock); 681 zpci_kvm_hook.kvm_register = kvm_s390_pci_register_kvm; 682 zpci_kvm_hook.kvm_unregister = kvm_s390_pci_unregister_kvm; 683 684 return 0; 685 } 686 687 void kvm_s390_pci_exit(void) 688 { 689 mutex_destroy(&aift->aift_lock); 690 zpci_kvm_hook.kvm_register = NULL; 691 zpci_kvm_hook.kvm_unregister = NULL; 692 693 kfree(aift); 694 } 695