1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2015 Intel Corporation. 4 * 5 * Authors: David Woodhouse <dwmw2@infradead.org> 6 */ 7 8 #include <linux/intel-iommu.h> 9 #include <linux/mmu_notifier.h> 10 #include <linux/sched.h> 11 #include <linux/sched/mm.h> 12 #include <linux/slab.h> 13 #include <linux/intel-svm.h> 14 #include <linux/rculist.h> 15 #include <linux/pci.h> 16 #include <linux/pci-ats.h> 17 #include <linux/dmar.h> 18 #include <linux/interrupt.h> 19 #include <linux/mm_types.h> 20 #include <linux/xarray.h> 21 #include <linux/ioasid.h> 22 #include <asm/page.h> 23 #include <asm/fpu/api.h> 24 #include <trace/events/intel_iommu.h> 25 26 #include "pasid.h" 27 #include "perf.h" 28 #include "../iommu-sva-lib.h" 29 30 static irqreturn_t prq_event_thread(int irq, void *d); 31 static void intel_svm_drain_prq(struct device *dev, u32 pasid); 32 #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) 33 34 static DEFINE_XARRAY_ALLOC(pasid_private_array); 35 static int pasid_private_add(ioasid_t pasid, void *priv) 36 { 37 return xa_alloc(&pasid_private_array, &pasid, priv, 38 XA_LIMIT(pasid, pasid), GFP_ATOMIC); 39 } 40 41 static void pasid_private_remove(ioasid_t pasid) 42 { 43 xa_erase(&pasid_private_array, pasid); 44 } 45 46 static void *pasid_private_find(ioasid_t pasid) 47 { 48 return xa_load(&pasid_private_array, pasid); 49 } 50 51 static struct intel_svm_dev * 52 svm_lookup_device_by_sid(struct intel_svm *svm, u16 sid) 53 { 54 struct intel_svm_dev *sdev = NULL, *t; 55 56 rcu_read_lock(); 57 list_for_each_entry_rcu(t, &svm->devs, list) { 58 if (t->sid == sid) { 59 sdev = t; 60 break; 61 } 62 } 63 rcu_read_unlock(); 64 65 return sdev; 66 } 67 68 static struct intel_svm_dev * 69 svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) 70 { 71 struct intel_svm_dev *sdev = NULL, *t; 72 73 rcu_read_lock(); 74 list_for_each_entry_rcu(t, &svm->devs, list) { 75 if (t->dev == dev) { 76 sdev = t; 77 break; 78 } 79 } 80 rcu_read_unlock(); 81 82 return sdev; 83 } 84 85 int intel_svm_enable_prq(struct intel_iommu *iommu) 86 { 87 struct iopf_queue *iopfq; 88 struct page *pages; 89 int irq, ret; 90 91 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); 92 if (!pages) { 93 pr_warn("IOMMU: %s: Failed to allocate page request queue\n", 94 iommu->name); 95 return -ENOMEM; 96 } 97 iommu->prq = page_address(pages); 98 99 irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu); 100 if (irq <= 0) { 101 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", 102 iommu->name); 103 ret = -EINVAL; 104 goto free_prq; 105 } 106 iommu->pr_irq = irq; 107 108 snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), 109 "dmar%d-iopfq", iommu->seq_id); 110 iopfq = iopf_queue_alloc(iommu->iopfq_name); 111 if (!iopfq) { 112 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); 113 ret = -ENOMEM; 114 goto free_hwirq; 115 } 116 iommu->iopf_queue = iopfq; 117 118 snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); 119 120 ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, 121 iommu->prq_name, iommu); 122 if (ret) { 123 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", 124 iommu->name); 125 goto free_iopfq; 126 } 127 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); 128 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); 129 dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); 130 131 init_completion(&iommu->prq_complete); 132 133 return 0; 134 135 free_iopfq: 136 iopf_queue_free(iommu->iopf_queue); 137 iommu->iopf_queue = NULL; 138 free_hwirq: 139 dmar_free_hwirq(irq); 140 iommu->pr_irq = 0; 141 free_prq: 142 free_pages((unsigned long)iommu->prq, PRQ_ORDER); 143 iommu->prq = NULL; 144 145 return ret; 146 } 147 148 int intel_svm_finish_prq(struct intel_iommu *iommu) 149 { 150 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); 151 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); 152 dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); 153 154 if (iommu->pr_irq) { 155 free_irq(iommu->pr_irq, iommu); 156 dmar_free_hwirq(iommu->pr_irq); 157 iommu->pr_irq = 0; 158 } 159 160 if (iommu->iopf_queue) { 161 iopf_queue_free(iommu->iopf_queue); 162 iommu->iopf_queue = NULL; 163 } 164 165 free_pages((unsigned long)iommu->prq, PRQ_ORDER); 166 iommu->prq = NULL; 167 168 return 0; 169 } 170 171 void intel_svm_check(struct intel_iommu *iommu) 172 { 173 if (!pasid_supported(iommu)) 174 return; 175 176 if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && 177 !cap_fl1gp_support(iommu->cap)) { 178 pr_err("%s SVM disabled, incompatible 1GB page capability\n", 179 iommu->name); 180 return; 181 } 182 183 if (cpu_feature_enabled(X86_FEATURE_LA57) && 184 !cap_5lp_support(iommu->cap)) { 185 pr_err("%s SVM disabled, incompatible paging mode\n", 186 iommu->name); 187 return; 188 } 189 190 iommu->flags |= VTD_FLAG_SVM_CAPABLE; 191 } 192 193 static void __flush_svm_range_dev(struct intel_svm *svm, 194 struct intel_svm_dev *sdev, 195 unsigned long address, 196 unsigned long pages, int ih) 197 { 198 struct device_domain_info *info = dev_iommu_priv_get(sdev->dev); 199 200 if (WARN_ON(!pages)) 201 return; 202 203 qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); 204 if (info->ats_enabled) 205 qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, 206 svm->pasid, sdev->qdep, address, 207 order_base_2(pages)); 208 } 209 210 static void intel_flush_svm_range_dev(struct intel_svm *svm, 211 struct intel_svm_dev *sdev, 212 unsigned long address, 213 unsigned long pages, int ih) 214 { 215 unsigned long shift = ilog2(__roundup_pow_of_two(pages)); 216 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); 217 unsigned long start = ALIGN_DOWN(address, align); 218 unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); 219 220 while (start < end) { 221 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); 222 start += align; 223 } 224 } 225 226 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, 227 unsigned long pages, int ih) 228 { 229 struct intel_svm_dev *sdev; 230 231 rcu_read_lock(); 232 list_for_each_entry_rcu(sdev, &svm->devs, list) 233 intel_flush_svm_range_dev(svm, sdev, address, pages, ih); 234 rcu_read_unlock(); 235 } 236 237 /* Pages have been freed at this point */ 238 static void intel_invalidate_range(struct mmu_notifier *mn, 239 struct mm_struct *mm, 240 unsigned long start, unsigned long end) 241 { 242 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 243 244 intel_flush_svm_range(svm, start, 245 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); 246 } 247 248 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) 249 { 250 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 251 struct intel_svm_dev *sdev; 252 253 /* This might end up being called from exit_mmap(), *before* the page 254 * tables are cleared. And __mmu_notifier_release() will delete us from 255 * the list of notifiers so that our invalidate_range() callback doesn't 256 * get called when the page tables are cleared. So we need to protect 257 * against hardware accessing those page tables. 258 * 259 * We do it by clearing the entry in the PASID table and then flushing 260 * the IOTLB and the PASID table caches. This might upset hardware; 261 * perhaps we'll want to point the PASID to a dummy PGD (like the zero 262 * page) so that we end up taking a fault that the hardware really 263 * *has* to handle gracefully without affecting other processes. 264 */ 265 rcu_read_lock(); 266 list_for_each_entry_rcu(sdev, &svm->devs, list) 267 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, 268 svm->pasid, true); 269 rcu_read_unlock(); 270 271 } 272 273 static const struct mmu_notifier_ops intel_mmuops = { 274 .release = intel_mm_release, 275 .invalidate_range = intel_invalidate_range, 276 }; 277 278 static DEFINE_MUTEX(pasid_mutex); 279 280 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, 281 struct intel_svm **rsvm, 282 struct intel_svm_dev **rsdev) 283 { 284 struct intel_svm_dev *sdev = NULL; 285 struct intel_svm *svm; 286 287 /* The caller should hold the pasid_mutex lock */ 288 if (WARN_ON(!mutex_is_locked(&pasid_mutex))) 289 return -EINVAL; 290 291 if (pasid == INVALID_IOASID || pasid >= PASID_MAX) 292 return -EINVAL; 293 294 svm = pasid_private_find(pasid); 295 if (IS_ERR(svm)) 296 return PTR_ERR(svm); 297 298 if (!svm) 299 goto out; 300 301 /* 302 * If we found svm for the PASID, there must be at least one device 303 * bond. 304 */ 305 if (WARN_ON(list_empty(&svm->devs))) 306 return -EINVAL; 307 sdev = svm_lookup_device_by_dev(svm, dev); 308 309 out: 310 *rsvm = svm; 311 *rsdev = sdev; 312 313 return 0; 314 } 315 316 static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, 317 unsigned int flags) 318 { 319 ioasid_t max_pasid = dev_is_pci(dev) ? 320 pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; 321 322 return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); 323 } 324 325 static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, 326 struct device *dev, 327 struct mm_struct *mm, 328 unsigned int flags) 329 { 330 struct device_domain_info *info = dev_iommu_priv_get(dev); 331 unsigned long iflags, sflags; 332 struct intel_svm_dev *sdev; 333 struct intel_svm *svm; 334 int ret = 0; 335 336 svm = pasid_private_find(mm->pasid); 337 if (!svm) { 338 svm = kzalloc(sizeof(*svm), GFP_KERNEL); 339 if (!svm) 340 return ERR_PTR(-ENOMEM); 341 342 svm->pasid = mm->pasid; 343 svm->mm = mm; 344 svm->flags = flags; 345 INIT_LIST_HEAD_RCU(&svm->devs); 346 347 if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { 348 svm->notifier.ops = &intel_mmuops; 349 ret = mmu_notifier_register(&svm->notifier, mm); 350 if (ret) { 351 kfree(svm); 352 return ERR_PTR(ret); 353 } 354 } 355 356 ret = pasid_private_add(svm->pasid, svm); 357 if (ret) { 358 if (svm->notifier.ops) 359 mmu_notifier_unregister(&svm->notifier, mm); 360 kfree(svm); 361 return ERR_PTR(ret); 362 } 363 } 364 365 /* Find the matching device in svm list */ 366 sdev = svm_lookup_device_by_dev(svm, dev); 367 if (sdev) { 368 sdev->users++; 369 goto success; 370 } 371 372 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); 373 if (!sdev) { 374 ret = -ENOMEM; 375 goto free_svm; 376 } 377 378 sdev->dev = dev; 379 sdev->iommu = iommu; 380 sdev->did = FLPT_DEFAULT_DID; 381 sdev->sid = PCI_DEVID(info->bus, info->devfn); 382 sdev->users = 1; 383 sdev->pasid = svm->pasid; 384 sdev->sva.dev = dev; 385 init_rcu_head(&sdev->rcu); 386 if (info->ats_enabled) { 387 sdev->dev_iotlb = 1; 388 sdev->qdep = info->ats_qdep; 389 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) 390 sdev->qdep = 0; 391 } 392 393 /* Setup the pasid table: */ 394 sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? 395 PASID_FLAG_SUPERVISOR_MODE : 0; 396 sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; 397 spin_lock_irqsave(&iommu->lock, iflags); 398 ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, 399 FLPT_DEFAULT_DID, sflags); 400 spin_unlock_irqrestore(&iommu->lock, iflags); 401 402 if (ret) 403 goto free_sdev; 404 405 list_add_rcu(&sdev->list, &svm->devs); 406 success: 407 return &sdev->sva; 408 409 free_sdev: 410 kfree(sdev); 411 free_svm: 412 if (list_empty(&svm->devs)) { 413 if (svm->notifier.ops) 414 mmu_notifier_unregister(&svm->notifier, mm); 415 pasid_private_remove(mm->pasid); 416 kfree(svm); 417 } 418 419 return ERR_PTR(ret); 420 } 421 422 /* Caller must hold pasid_mutex */ 423 static int intel_svm_unbind_mm(struct device *dev, u32 pasid) 424 { 425 struct intel_svm_dev *sdev; 426 struct intel_iommu *iommu; 427 struct intel_svm *svm; 428 struct mm_struct *mm; 429 int ret = -EINVAL; 430 431 iommu = device_to_iommu(dev, NULL, NULL); 432 if (!iommu) 433 goto out; 434 435 ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); 436 if (ret) 437 goto out; 438 mm = svm->mm; 439 440 if (sdev) { 441 sdev->users--; 442 if (!sdev->users) { 443 list_del_rcu(&sdev->list); 444 /* Flush the PASID cache and IOTLB for this device. 445 * Note that we do depend on the hardware *not* using 446 * the PASID any more. Just as we depend on other 447 * devices never using PASIDs that they have no right 448 * to use. We have a *shared* PASID table, because it's 449 * large and has to be physically contiguous. So it's 450 * hard to be as defensive as we might like. */ 451 intel_pasid_tear_down_entry(iommu, dev, 452 svm->pasid, false); 453 intel_svm_drain_prq(dev, svm->pasid); 454 kfree_rcu(sdev, rcu); 455 456 if (list_empty(&svm->devs)) { 457 if (svm->notifier.ops) 458 mmu_notifier_unregister(&svm->notifier, mm); 459 pasid_private_remove(svm->pasid); 460 /* We mandate that no page faults may be outstanding 461 * for the PASID when intel_svm_unbind_mm() is called. 462 * If that is not obeyed, subtle errors will happen. 463 * Let's make them less subtle... */ 464 memset(svm, 0x6b, sizeof(*svm)); 465 kfree(svm); 466 } 467 } 468 } 469 out: 470 return ret; 471 } 472 473 /* Page request queue descriptor */ 474 struct page_req_dsc { 475 union { 476 struct { 477 u64 type:8; 478 u64 pasid_present:1; 479 u64 priv_data_present:1; 480 u64 rsvd:6; 481 u64 rid:16; 482 u64 pasid:20; 483 u64 exe_req:1; 484 u64 pm_req:1; 485 u64 rsvd2:10; 486 }; 487 u64 qw_0; 488 }; 489 union { 490 struct { 491 u64 rd_req:1; 492 u64 wr_req:1; 493 u64 lpig:1; 494 u64 prg_index:9; 495 u64 addr:52; 496 }; 497 u64 qw_1; 498 }; 499 u64 priv_data[2]; 500 }; 501 502 static bool is_canonical_address(u64 addr) 503 { 504 int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); 505 long saddr = (long) addr; 506 507 return (((saddr << shift) >> shift) == saddr); 508 } 509 510 /** 511 * intel_svm_drain_prq - Drain page requests and responses for a pasid 512 * @dev: target device 513 * @pasid: pasid for draining 514 * 515 * Drain all pending page requests and responses related to @pasid in both 516 * software and hardware. This is supposed to be called after the device 517 * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB 518 * and DevTLB have been invalidated. 519 * 520 * It waits until all pending page requests for @pasid in the page fault 521 * queue are completed by the prq handling thread. Then follow the steps 522 * described in VT-d spec CH7.10 to drain all page requests and page 523 * responses pending in the hardware. 524 */ 525 static void intel_svm_drain_prq(struct device *dev, u32 pasid) 526 { 527 struct device_domain_info *info; 528 struct dmar_domain *domain; 529 struct intel_iommu *iommu; 530 struct qi_desc desc[3]; 531 struct pci_dev *pdev; 532 int head, tail; 533 u16 sid, did; 534 int qdep; 535 536 info = dev_iommu_priv_get(dev); 537 if (WARN_ON(!info || !dev_is_pci(dev))) 538 return; 539 540 if (!info->pri_enabled) 541 return; 542 543 iommu = info->iommu; 544 domain = info->domain; 545 pdev = to_pci_dev(dev); 546 sid = PCI_DEVID(info->bus, info->devfn); 547 did = domain->iommu_did[iommu->seq_id]; 548 qdep = pci_ats_queue_depth(pdev); 549 550 /* 551 * Check and wait until all pending page requests in the queue are 552 * handled by the prq handling thread. 553 */ 554 prq_retry: 555 reinit_completion(&iommu->prq_complete); 556 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 557 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 558 while (head != tail) { 559 struct page_req_dsc *req; 560 561 req = &iommu->prq[head / sizeof(*req)]; 562 if (!req->pasid_present || req->pasid != pasid) { 563 head = (head + sizeof(*req)) & PRQ_RING_MASK; 564 continue; 565 } 566 567 wait_for_completion(&iommu->prq_complete); 568 goto prq_retry; 569 } 570 571 /* 572 * A work in IO page fault workqueue may try to lock pasid_mutex now. 573 * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for 574 * all works in the workqueue to finish may cause deadlock. 575 * 576 * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev(). 577 * Unlock it to allow the works to be handled while waiting for 578 * them to finish. 579 */ 580 lockdep_assert_held(&pasid_mutex); 581 mutex_unlock(&pasid_mutex); 582 iopf_queue_flush_dev(dev); 583 mutex_lock(&pasid_mutex); 584 585 /* 586 * Perform steps described in VT-d spec CH7.10 to drain page 587 * requests and responses in hardware. 588 */ 589 memset(desc, 0, sizeof(desc)); 590 desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) | 591 QI_IWD_FENCE | 592 QI_IWD_TYPE; 593 desc[1].qw0 = QI_EIOTLB_PASID(pasid) | 594 QI_EIOTLB_DID(did) | 595 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | 596 QI_EIOTLB_TYPE; 597 desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) | 598 QI_DEV_EIOTLB_SID(sid) | 599 QI_DEV_EIOTLB_QDEP(qdep) | 600 QI_DEIOTLB_TYPE | 601 QI_DEV_IOTLB_PFSID(info->pfsid); 602 qi_retry: 603 reinit_completion(&iommu->prq_complete); 604 qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN); 605 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 606 wait_for_completion(&iommu->prq_complete); 607 goto qi_retry; 608 } 609 } 610 611 static int prq_to_iommu_prot(struct page_req_dsc *req) 612 { 613 int prot = 0; 614 615 if (req->rd_req) 616 prot |= IOMMU_FAULT_PERM_READ; 617 if (req->wr_req) 618 prot |= IOMMU_FAULT_PERM_WRITE; 619 if (req->exe_req) 620 prot |= IOMMU_FAULT_PERM_EXEC; 621 if (req->pm_req) 622 prot |= IOMMU_FAULT_PERM_PRIV; 623 624 return prot; 625 } 626 627 static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, 628 struct page_req_dsc *desc) 629 { 630 struct iommu_fault_event event; 631 632 if (!dev || !dev_is_pci(dev)) 633 return -ENODEV; 634 635 /* Fill in event data for device specific processing */ 636 memset(&event, 0, sizeof(struct iommu_fault_event)); 637 event.fault.type = IOMMU_FAULT_PAGE_REQ; 638 event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; 639 event.fault.prm.pasid = desc->pasid; 640 event.fault.prm.grpid = desc->prg_index; 641 event.fault.prm.perm = prq_to_iommu_prot(desc); 642 643 if (desc->lpig) 644 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 645 if (desc->pasid_present) { 646 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 647 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 648 } 649 if (desc->priv_data_present) { 650 /* 651 * Set last page in group bit if private data is present, 652 * page response is required as it does for LPIG. 653 * iommu_report_device_fault() doesn't understand this vendor 654 * specific requirement thus we set last_page as a workaround. 655 */ 656 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 657 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; 658 event.fault.prm.private_data[0] = desc->priv_data[0]; 659 event.fault.prm.private_data[1] = desc->priv_data[1]; 660 } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { 661 /* 662 * If the private data fields are not used by hardware, use it 663 * to monitor the prq handle latency. 664 */ 665 event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); 666 } 667 668 return iommu_report_device_fault(dev, &event); 669 } 670 671 static void handle_bad_prq_event(struct intel_iommu *iommu, 672 struct page_req_dsc *req, int result) 673 { 674 struct qi_desc desc; 675 676 pr_err("%s: Invalid page request: %08llx %08llx\n", 677 iommu->name, ((unsigned long long *)req)[0], 678 ((unsigned long long *)req)[1]); 679 680 /* 681 * Per VT-d spec. v3.0 ch7.7, system software must 682 * respond with page group response if private data 683 * is present (PDP) or last page in group (LPIG) bit 684 * is set. This is an additional VT-d feature beyond 685 * PCI ATS spec. 686 */ 687 if (!req->lpig && !req->priv_data_present) 688 return; 689 690 desc.qw0 = QI_PGRP_PASID(req->pasid) | 691 QI_PGRP_DID(req->rid) | 692 QI_PGRP_PASID_P(req->pasid_present) | 693 QI_PGRP_PDP(req->priv_data_present) | 694 QI_PGRP_RESP_CODE(result) | 695 QI_PGRP_RESP_TYPE; 696 desc.qw1 = QI_PGRP_IDX(req->prg_index) | 697 QI_PGRP_LPIG(req->lpig); 698 699 if (req->priv_data_present) { 700 desc.qw2 = req->priv_data[0]; 701 desc.qw3 = req->priv_data[1]; 702 } else { 703 desc.qw2 = 0; 704 desc.qw3 = 0; 705 } 706 707 qi_submit_sync(iommu, &desc, 1, 0); 708 } 709 710 static irqreturn_t prq_event_thread(int irq, void *d) 711 { 712 struct intel_svm_dev *sdev = NULL; 713 struct intel_iommu *iommu = d; 714 struct intel_svm *svm = NULL; 715 struct page_req_dsc *req; 716 int head, tail, handled; 717 u64 address; 718 719 /* 720 * Clear PPR bit before reading head/tail registers, to ensure that 721 * we get a new interrupt if needed. 722 */ 723 writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); 724 725 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 726 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 727 handled = (head != tail); 728 while (head != tail) { 729 req = &iommu->prq[head / sizeof(*req)]; 730 address = (u64)req->addr << VTD_PAGE_SHIFT; 731 732 if (unlikely(!req->pasid_present)) { 733 pr_err("IOMMU: %s: Page request without PASID\n", 734 iommu->name); 735 bad_req: 736 svm = NULL; 737 sdev = NULL; 738 handle_bad_prq_event(iommu, req, QI_RESP_INVALID); 739 goto prq_advance; 740 } 741 742 if (unlikely(!is_canonical_address(address))) { 743 pr_err("IOMMU: %s: Address is not canonical\n", 744 iommu->name); 745 goto bad_req; 746 } 747 748 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { 749 pr_err("IOMMU: %s: Page request in Privilege Mode\n", 750 iommu->name); 751 goto bad_req; 752 } 753 754 if (unlikely(req->exe_req && req->rd_req)) { 755 pr_err("IOMMU: %s: Execution request not supported\n", 756 iommu->name); 757 goto bad_req; 758 } 759 760 /* Drop Stop Marker message. No need for a response. */ 761 if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) 762 goto prq_advance; 763 764 if (!svm || svm->pasid != req->pasid) { 765 /* 766 * It can't go away, because the driver is not permitted 767 * to unbind the mm while any page faults are outstanding. 768 */ 769 svm = pasid_private_find(req->pasid); 770 if (IS_ERR_OR_NULL(svm) || (svm->flags & SVM_FLAG_SUPERVISOR_MODE)) 771 goto bad_req; 772 } 773 774 if (!sdev || sdev->sid != req->rid) { 775 sdev = svm_lookup_device_by_sid(svm, req->rid); 776 if (!sdev) 777 goto bad_req; 778 } 779 780 sdev->prq_seq_number++; 781 782 /* 783 * If prq is to be handled outside iommu driver via receiver of 784 * the fault notifiers, we skip the page response here. 785 */ 786 if (intel_svm_prq_report(iommu, sdev->dev, req)) 787 handle_bad_prq_event(iommu, req, QI_RESP_INVALID); 788 789 trace_prq_report(iommu, sdev->dev, req->qw_0, req->qw_1, 790 req->priv_data[0], req->priv_data[1], 791 sdev->prq_seq_number); 792 prq_advance: 793 head = (head + sizeof(*req)) & PRQ_RING_MASK; 794 } 795 796 dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); 797 798 /* 799 * Clear the page request overflow bit and wake up all threads that 800 * are waiting for the completion of this handling. 801 */ 802 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 803 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", 804 iommu->name); 805 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 806 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 807 if (head == tail) { 808 iopf_queue_discard_partial(iommu->iopf_queue); 809 writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); 810 pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", 811 iommu->name); 812 } 813 } 814 815 if (!completion_done(&iommu->prq_complete)) 816 complete(&iommu->prq_complete); 817 818 return IRQ_RETVAL(handled); 819 } 820 821 struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) 822 { 823 struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); 824 unsigned int flags = 0; 825 struct iommu_sva *sva; 826 int ret; 827 828 if (drvdata) 829 flags = *(unsigned int *)drvdata; 830 831 if (flags & SVM_FLAG_SUPERVISOR_MODE) { 832 if (!ecap_srs(iommu->ecap)) { 833 dev_err(dev, "%s: Supervisor PASID not supported\n", 834 iommu->name); 835 return ERR_PTR(-EOPNOTSUPP); 836 } 837 838 if (mm) { 839 dev_err(dev, "%s: Supervisor PASID with user provided mm\n", 840 iommu->name); 841 return ERR_PTR(-EINVAL); 842 } 843 844 mm = &init_mm; 845 } 846 847 mutex_lock(&pasid_mutex); 848 ret = intel_svm_alloc_pasid(dev, mm, flags); 849 if (ret) { 850 mutex_unlock(&pasid_mutex); 851 return ERR_PTR(ret); 852 } 853 854 sva = intel_svm_bind_mm(iommu, dev, mm, flags); 855 mutex_unlock(&pasid_mutex); 856 857 return sva; 858 } 859 860 void intel_svm_unbind(struct iommu_sva *sva) 861 { 862 struct intel_svm_dev *sdev = to_intel_svm_dev(sva); 863 864 mutex_lock(&pasid_mutex); 865 intel_svm_unbind_mm(sdev->dev, sdev->pasid); 866 mutex_unlock(&pasid_mutex); 867 } 868 869 u32 intel_svm_get_pasid(struct iommu_sva *sva) 870 { 871 struct intel_svm_dev *sdev; 872 u32 pasid; 873 874 mutex_lock(&pasid_mutex); 875 sdev = to_intel_svm_dev(sva); 876 pasid = sdev->pasid; 877 mutex_unlock(&pasid_mutex); 878 879 return pasid; 880 } 881 882 int intel_svm_page_response(struct device *dev, 883 struct iommu_fault_event *evt, 884 struct iommu_page_response *msg) 885 { 886 struct iommu_fault_page_request *prm; 887 struct intel_svm_dev *sdev = NULL; 888 struct intel_svm *svm = NULL; 889 struct intel_iommu *iommu; 890 bool private_present; 891 bool pasid_present; 892 bool last_page; 893 u8 bus, devfn; 894 int ret = 0; 895 u16 sid; 896 897 if (!dev || !dev_is_pci(dev)) 898 return -ENODEV; 899 900 iommu = device_to_iommu(dev, &bus, &devfn); 901 if (!iommu) 902 return -ENODEV; 903 904 if (!msg || !evt) 905 return -EINVAL; 906 907 mutex_lock(&pasid_mutex); 908 909 prm = &evt->fault.prm; 910 sid = PCI_DEVID(bus, devfn); 911 pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 912 private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; 913 last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 914 915 if (!pasid_present) { 916 ret = -EINVAL; 917 goto out; 918 } 919 920 if (prm->pasid == 0 || prm->pasid >= PASID_MAX) { 921 ret = -EINVAL; 922 goto out; 923 } 924 925 ret = pasid_to_svm_sdev(dev, prm->pasid, &svm, &sdev); 926 if (ret || !sdev) { 927 ret = -ENODEV; 928 goto out; 929 } 930 931 /* 932 * Per VT-d spec. v3.0 ch7.7, system software must respond 933 * with page group response if private data is present (PDP) 934 * or last page in group (LPIG) bit is set. This is an 935 * additional VT-d requirement beyond PCI ATS spec. 936 */ 937 if (last_page || private_present) { 938 struct qi_desc desc; 939 940 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | 941 QI_PGRP_PASID_P(pasid_present) | 942 QI_PGRP_PDP(private_present) | 943 QI_PGRP_RESP_CODE(msg->code) | 944 QI_PGRP_RESP_TYPE; 945 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); 946 desc.qw2 = 0; 947 desc.qw3 = 0; 948 949 if (private_present) { 950 desc.qw2 = prm->private_data[0]; 951 desc.qw3 = prm->private_data[1]; 952 } else if (prm->private_data[0]) { 953 dmar_latency_update(iommu, DMAR_LATENCY_PRQ, 954 ktime_to_ns(ktime_get()) - prm->private_data[0]); 955 } 956 957 qi_submit_sync(iommu, &desc, 1, 0); 958 } 959 out: 960 mutex_unlock(&pasid_mutex); 961 return ret; 962 } 963