1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2015 Intel Corporation. 4 * 5 * Authors: David Woodhouse <dwmw2@infradead.org> 6 */ 7 8 #include <linux/mmu_notifier.h> 9 #include <linux/sched.h> 10 #include <linux/sched/mm.h> 11 #include <linux/slab.h> 12 #include <linux/intel-svm.h> 13 #include <linux/rculist.h> 14 #include <linux/pci.h> 15 #include <linux/pci-ats.h> 16 #include <linux/dmar.h> 17 #include <linux/interrupt.h> 18 #include <linux/mm_types.h> 19 #include <linux/xarray.h> 20 #include <linux/ioasid.h> 21 #include <asm/page.h> 22 #include <asm/fpu/api.h> 23 24 #include "iommu.h" 25 #include "pasid.h" 26 #include "perf.h" 27 #include "../iommu-sva-lib.h" 28 #include "trace.h" 29 30 static irqreturn_t prq_event_thread(int irq, void *d); 31 static void intel_svm_drain_prq(struct device *dev, u32 pasid); 32 #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) 33 34 static DEFINE_XARRAY_ALLOC(pasid_private_array); 35 static int pasid_private_add(ioasid_t pasid, void *priv) 36 { 37 return xa_alloc(&pasid_private_array, &pasid, priv, 38 XA_LIMIT(pasid, pasid), GFP_ATOMIC); 39 } 40 41 static void pasid_private_remove(ioasid_t pasid) 42 { 43 xa_erase(&pasid_private_array, pasid); 44 } 45 46 static void *pasid_private_find(ioasid_t pasid) 47 { 48 return xa_load(&pasid_private_array, pasid); 49 } 50 51 static struct intel_svm_dev * 52 svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) 53 { 54 struct intel_svm_dev *sdev = NULL, *t; 55 56 rcu_read_lock(); 57 list_for_each_entry_rcu(t, &svm->devs, list) { 58 if (t->dev == dev) { 59 sdev = t; 60 break; 61 } 62 } 63 rcu_read_unlock(); 64 65 return sdev; 66 } 67 68 int intel_svm_enable_prq(struct intel_iommu *iommu) 69 { 70 struct iopf_queue *iopfq; 71 struct page *pages; 72 int irq, ret; 73 74 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); 75 if (!pages) { 76 pr_warn("IOMMU: %s: Failed to allocate page request queue\n", 77 iommu->name); 78 return -ENOMEM; 79 } 80 iommu->prq = page_address(pages); 81 82 irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu); 83 if (irq <= 0) { 84 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", 85 iommu->name); 86 ret = -EINVAL; 87 goto free_prq; 88 } 89 iommu->pr_irq = irq; 90 91 snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), 92 "dmar%d-iopfq", iommu->seq_id); 93 iopfq = iopf_queue_alloc(iommu->iopfq_name); 94 if (!iopfq) { 95 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); 96 ret = -ENOMEM; 97 goto free_hwirq; 98 } 99 iommu->iopf_queue = iopfq; 100 101 snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); 102 103 ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, 104 iommu->prq_name, iommu); 105 if (ret) { 106 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", 107 iommu->name); 108 goto free_iopfq; 109 } 110 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); 111 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); 112 dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); 113 114 init_completion(&iommu->prq_complete); 115 116 return 0; 117 118 free_iopfq: 119 iopf_queue_free(iommu->iopf_queue); 120 iommu->iopf_queue = NULL; 121 free_hwirq: 122 dmar_free_hwirq(irq); 123 iommu->pr_irq = 0; 124 free_prq: 125 free_pages((unsigned long)iommu->prq, PRQ_ORDER); 126 iommu->prq = NULL; 127 128 return ret; 129 } 130 131 int intel_svm_finish_prq(struct intel_iommu *iommu) 132 { 133 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); 134 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); 135 dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); 136 137 if (iommu->pr_irq) { 138 free_irq(iommu->pr_irq, iommu); 139 dmar_free_hwirq(iommu->pr_irq); 140 iommu->pr_irq = 0; 141 } 142 143 if (iommu->iopf_queue) { 144 iopf_queue_free(iommu->iopf_queue); 145 iommu->iopf_queue = NULL; 146 } 147 148 free_pages((unsigned long)iommu->prq, PRQ_ORDER); 149 iommu->prq = NULL; 150 151 return 0; 152 } 153 154 void intel_svm_check(struct intel_iommu *iommu) 155 { 156 if (!pasid_supported(iommu)) 157 return; 158 159 if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && 160 !cap_fl1gp_support(iommu->cap)) { 161 pr_err("%s SVM disabled, incompatible 1GB page capability\n", 162 iommu->name); 163 return; 164 } 165 166 if (cpu_feature_enabled(X86_FEATURE_LA57) && 167 !cap_fl5lp_support(iommu->cap)) { 168 pr_err("%s SVM disabled, incompatible paging mode\n", 169 iommu->name); 170 return; 171 } 172 173 iommu->flags |= VTD_FLAG_SVM_CAPABLE; 174 } 175 176 static void __flush_svm_range_dev(struct intel_svm *svm, 177 struct intel_svm_dev *sdev, 178 unsigned long address, 179 unsigned long pages, int ih) 180 { 181 struct device_domain_info *info = dev_iommu_priv_get(sdev->dev); 182 183 if (WARN_ON(!pages)) 184 return; 185 186 qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); 187 if (info->ats_enabled) 188 qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, 189 svm->pasid, sdev->qdep, address, 190 order_base_2(pages)); 191 } 192 193 static void intel_flush_svm_range_dev(struct intel_svm *svm, 194 struct intel_svm_dev *sdev, 195 unsigned long address, 196 unsigned long pages, int ih) 197 { 198 unsigned long shift = ilog2(__roundup_pow_of_two(pages)); 199 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); 200 unsigned long start = ALIGN_DOWN(address, align); 201 unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); 202 203 while (start < end) { 204 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); 205 start += align; 206 } 207 } 208 209 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, 210 unsigned long pages, int ih) 211 { 212 struct intel_svm_dev *sdev; 213 214 rcu_read_lock(); 215 list_for_each_entry_rcu(sdev, &svm->devs, list) 216 intel_flush_svm_range_dev(svm, sdev, address, pages, ih); 217 rcu_read_unlock(); 218 } 219 220 /* Pages have been freed at this point */ 221 static void intel_invalidate_range(struct mmu_notifier *mn, 222 struct mm_struct *mm, 223 unsigned long start, unsigned long end) 224 { 225 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 226 227 intel_flush_svm_range(svm, start, 228 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); 229 } 230 231 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) 232 { 233 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 234 struct intel_svm_dev *sdev; 235 236 /* This might end up being called from exit_mmap(), *before* the page 237 * tables are cleared. And __mmu_notifier_release() will delete us from 238 * the list of notifiers so that our invalidate_range() callback doesn't 239 * get called when the page tables are cleared. So we need to protect 240 * against hardware accessing those page tables. 241 * 242 * We do it by clearing the entry in the PASID table and then flushing 243 * the IOTLB and the PASID table caches. This might upset hardware; 244 * perhaps we'll want to point the PASID to a dummy PGD (like the zero 245 * page) so that we end up taking a fault that the hardware really 246 * *has* to handle gracefully without affecting other processes. 247 */ 248 rcu_read_lock(); 249 list_for_each_entry_rcu(sdev, &svm->devs, list) 250 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, 251 svm->pasid, true); 252 rcu_read_unlock(); 253 254 } 255 256 static const struct mmu_notifier_ops intel_mmuops = { 257 .release = intel_mm_release, 258 .invalidate_range = intel_invalidate_range, 259 }; 260 261 static DEFINE_MUTEX(pasid_mutex); 262 263 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, 264 struct intel_svm **rsvm, 265 struct intel_svm_dev **rsdev) 266 { 267 struct intel_svm_dev *sdev = NULL; 268 struct intel_svm *svm; 269 270 /* The caller should hold the pasid_mutex lock */ 271 if (WARN_ON(!mutex_is_locked(&pasid_mutex))) 272 return -EINVAL; 273 274 if (pasid == INVALID_IOASID || pasid >= PASID_MAX) 275 return -EINVAL; 276 277 svm = pasid_private_find(pasid); 278 if (IS_ERR(svm)) 279 return PTR_ERR(svm); 280 281 if (!svm) 282 goto out; 283 284 /* 285 * If we found svm for the PASID, there must be at least one device 286 * bond. 287 */ 288 if (WARN_ON(list_empty(&svm->devs))) 289 return -EINVAL; 290 sdev = svm_lookup_device_by_dev(svm, dev); 291 292 out: 293 *rsvm = svm; 294 *rsdev = sdev; 295 296 return 0; 297 } 298 299 static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, 300 unsigned int flags) 301 { 302 ioasid_t max_pasid = dev_is_pci(dev) ? 303 pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; 304 305 return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); 306 } 307 308 static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, 309 struct device *dev, 310 struct mm_struct *mm, 311 unsigned int flags) 312 { 313 struct device_domain_info *info = dev_iommu_priv_get(dev); 314 struct intel_svm_dev *sdev; 315 struct intel_svm *svm; 316 unsigned long sflags; 317 int ret = 0; 318 319 svm = pasid_private_find(mm->pasid); 320 if (!svm) { 321 svm = kzalloc(sizeof(*svm), GFP_KERNEL); 322 if (!svm) 323 return ERR_PTR(-ENOMEM); 324 325 svm->pasid = mm->pasid; 326 svm->mm = mm; 327 svm->flags = flags; 328 INIT_LIST_HEAD_RCU(&svm->devs); 329 330 if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { 331 svm->notifier.ops = &intel_mmuops; 332 ret = mmu_notifier_register(&svm->notifier, mm); 333 if (ret) { 334 kfree(svm); 335 return ERR_PTR(ret); 336 } 337 } 338 339 ret = pasid_private_add(svm->pasid, svm); 340 if (ret) { 341 if (svm->notifier.ops) 342 mmu_notifier_unregister(&svm->notifier, mm); 343 kfree(svm); 344 return ERR_PTR(ret); 345 } 346 } 347 348 /* Find the matching device in svm list */ 349 sdev = svm_lookup_device_by_dev(svm, dev); 350 if (sdev) { 351 sdev->users++; 352 goto success; 353 } 354 355 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); 356 if (!sdev) { 357 ret = -ENOMEM; 358 goto free_svm; 359 } 360 361 sdev->dev = dev; 362 sdev->iommu = iommu; 363 sdev->did = FLPT_DEFAULT_DID; 364 sdev->sid = PCI_DEVID(info->bus, info->devfn); 365 sdev->users = 1; 366 sdev->pasid = svm->pasid; 367 sdev->sva.dev = dev; 368 init_rcu_head(&sdev->rcu); 369 if (info->ats_enabled) { 370 sdev->dev_iotlb = 1; 371 sdev->qdep = info->ats_qdep; 372 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) 373 sdev->qdep = 0; 374 } 375 376 /* Setup the pasid table: */ 377 sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? 378 PASID_FLAG_SUPERVISOR_MODE : 0; 379 sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; 380 ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, 381 FLPT_DEFAULT_DID, sflags); 382 if (ret) 383 goto free_sdev; 384 385 list_add_rcu(&sdev->list, &svm->devs); 386 success: 387 return &sdev->sva; 388 389 free_sdev: 390 kfree(sdev); 391 free_svm: 392 if (list_empty(&svm->devs)) { 393 if (svm->notifier.ops) 394 mmu_notifier_unregister(&svm->notifier, mm); 395 pasid_private_remove(mm->pasid); 396 kfree(svm); 397 } 398 399 return ERR_PTR(ret); 400 } 401 402 /* Caller must hold pasid_mutex */ 403 static int intel_svm_unbind_mm(struct device *dev, u32 pasid) 404 { 405 struct intel_svm_dev *sdev; 406 struct intel_iommu *iommu; 407 struct intel_svm *svm; 408 struct mm_struct *mm; 409 int ret = -EINVAL; 410 411 iommu = device_to_iommu(dev, NULL, NULL); 412 if (!iommu) 413 goto out; 414 415 ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); 416 if (ret) 417 goto out; 418 mm = svm->mm; 419 420 if (sdev) { 421 sdev->users--; 422 if (!sdev->users) { 423 list_del_rcu(&sdev->list); 424 /* Flush the PASID cache and IOTLB for this device. 425 * Note that we do depend on the hardware *not* using 426 * the PASID any more. Just as we depend on other 427 * devices never using PASIDs that they have no right 428 * to use. We have a *shared* PASID table, because it's 429 * large and has to be physically contiguous. So it's 430 * hard to be as defensive as we might like. */ 431 intel_pasid_tear_down_entry(iommu, dev, 432 svm->pasid, false); 433 intel_svm_drain_prq(dev, svm->pasid); 434 kfree_rcu(sdev, rcu); 435 436 if (list_empty(&svm->devs)) { 437 if (svm->notifier.ops) 438 mmu_notifier_unregister(&svm->notifier, mm); 439 pasid_private_remove(svm->pasid); 440 /* We mandate that no page faults may be outstanding 441 * for the PASID when intel_svm_unbind_mm() is called. 442 * If that is not obeyed, subtle errors will happen. 443 * Let's make them less subtle... */ 444 memset(svm, 0x6b, sizeof(*svm)); 445 kfree(svm); 446 } 447 } 448 } 449 out: 450 return ret; 451 } 452 453 /* Page request queue descriptor */ 454 struct page_req_dsc { 455 union { 456 struct { 457 u64 type:8; 458 u64 pasid_present:1; 459 u64 priv_data_present:1; 460 u64 rsvd:6; 461 u64 rid:16; 462 u64 pasid:20; 463 u64 exe_req:1; 464 u64 pm_req:1; 465 u64 rsvd2:10; 466 }; 467 u64 qw_0; 468 }; 469 union { 470 struct { 471 u64 rd_req:1; 472 u64 wr_req:1; 473 u64 lpig:1; 474 u64 prg_index:9; 475 u64 addr:52; 476 }; 477 u64 qw_1; 478 }; 479 u64 priv_data[2]; 480 }; 481 482 static bool is_canonical_address(u64 addr) 483 { 484 int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); 485 long saddr = (long) addr; 486 487 return (((saddr << shift) >> shift) == saddr); 488 } 489 490 /** 491 * intel_svm_drain_prq - Drain page requests and responses for a pasid 492 * @dev: target device 493 * @pasid: pasid for draining 494 * 495 * Drain all pending page requests and responses related to @pasid in both 496 * software and hardware. This is supposed to be called after the device 497 * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB 498 * and DevTLB have been invalidated. 499 * 500 * It waits until all pending page requests for @pasid in the page fault 501 * queue are completed by the prq handling thread. Then follow the steps 502 * described in VT-d spec CH7.10 to drain all page requests and page 503 * responses pending in the hardware. 504 */ 505 static void intel_svm_drain_prq(struct device *dev, u32 pasid) 506 { 507 struct device_domain_info *info; 508 struct dmar_domain *domain; 509 struct intel_iommu *iommu; 510 struct qi_desc desc[3]; 511 struct pci_dev *pdev; 512 int head, tail; 513 u16 sid, did; 514 int qdep; 515 516 info = dev_iommu_priv_get(dev); 517 if (WARN_ON(!info || !dev_is_pci(dev))) 518 return; 519 520 if (!info->pri_enabled) 521 return; 522 523 iommu = info->iommu; 524 domain = info->domain; 525 pdev = to_pci_dev(dev); 526 sid = PCI_DEVID(info->bus, info->devfn); 527 did = domain_id_iommu(domain, iommu); 528 qdep = pci_ats_queue_depth(pdev); 529 530 /* 531 * Check and wait until all pending page requests in the queue are 532 * handled by the prq handling thread. 533 */ 534 prq_retry: 535 reinit_completion(&iommu->prq_complete); 536 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 537 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 538 while (head != tail) { 539 struct page_req_dsc *req; 540 541 req = &iommu->prq[head / sizeof(*req)]; 542 if (!req->pasid_present || req->pasid != pasid) { 543 head = (head + sizeof(*req)) & PRQ_RING_MASK; 544 continue; 545 } 546 547 wait_for_completion(&iommu->prq_complete); 548 goto prq_retry; 549 } 550 551 /* 552 * A work in IO page fault workqueue may try to lock pasid_mutex now. 553 * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for 554 * all works in the workqueue to finish may cause deadlock. 555 * 556 * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev(). 557 * Unlock it to allow the works to be handled while waiting for 558 * them to finish. 559 */ 560 lockdep_assert_held(&pasid_mutex); 561 mutex_unlock(&pasid_mutex); 562 iopf_queue_flush_dev(dev); 563 mutex_lock(&pasid_mutex); 564 565 /* 566 * Perform steps described in VT-d spec CH7.10 to drain page 567 * requests and responses in hardware. 568 */ 569 memset(desc, 0, sizeof(desc)); 570 desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) | 571 QI_IWD_FENCE | 572 QI_IWD_TYPE; 573 desc[1].qw0 = QI_EIOTLB_PASID(pasid) | 574 QI_EIOTLB_DID(did) | 575 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | 576 QI_EIOTLB_TYPE; 577 desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) | 578 QI_DEV_EIOTLB_SID(sid) | 579 QI_DEV_EIOTLB_QDEP(qdep) | 580 QI_DEIOTLB_TYPE | 581 QI_DEV_IOTLB_PFSID(info->pfsid); 582 qi_retry: 583 reinit_completion(&iommu->prq_complete); 584 qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN); 585 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 586 wait_for_completion(&iommu->prq_complete); 587 goto qi_retry; 588 } 589 } 590 591 static int prq_to_iommu_prot(struct page_req_dsc *req) 592 { 593 int prot = 0; 594 595 if (req->rd_req) 596 prot |= IOMMU_FAULT_PERM_READ; 597 if (req->wr_req) 598 prot |= IOMMU_FAULT_PERM_WRITE; 599 if (req->exe_req) 600 prot |= IOMMU_FAULT_PERM_EXEC; 601 if (req->pm_req) 602 prot |= IOMMU_FAULT_PERM_PRIV; 603 604 return prot; 605 } 606 607 static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, 608 struct page_req_dsc *desc) 609 { 610 struct iommu_fault_event event; 611 612 if (!dev || !dev_is_pci(dev)) 613 return -ENODEV; 614 615 /* Fill in event data for device specific processing */ 616 memset(&event, 0, sizeof(struct iommu_fault_event)); 617 event.fault.type = IOMMU_FAULT_PAGE_REQ; 618 event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; 619 event.fault.prm.pasid = desc->pasid; 620 event.fault.prm.grpid = desc->prg_index; 621 event.fault.prm.perm = prq_to_iommu_prot(desc); 622 623 if (desc->lpig) 624 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 625 if (desc->pasid_present) { 626 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 627 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 628 } 629 if (desc->priv_data_present) { 630 /* 631 * Set last page in group bit if private data is present, 632 * page response is required as it does for LPIG. 633 * iommu_report_device_fault() doesn't understand this vendor 634 * specific requirement thus we set last_page as a workaround. 635 */ 636 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 637 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; 638 event.fault.prm.private_data[0] = desc->priv_data[0]; 639 event.fault.prm.private_data[1] = desc->priv_data[1]; 640 } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { 641 /* 642 * If the private data fields are not used by hardware, use it 643 * to monitor the prq handle latency. 644 */ 645 event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); 646 } 647 648 return iommu_report_device_fault(dev, &event); 649 } 650 651 static void handle_bad_prq_event(struct intel_iommu *iommu, 652 struct page_req_dsc *req, int result) 653 { 654 struct qi_desc desc; 655 656 pr_err("%s: Invalid page request: %08llx %08llx\n", 657 iommu->name, ((unsigned long long *)req)[0], 658 ((unsigned long long *)req)[1]); 659 660 /* 661 * Per VT-d spec. v3.0 ch7.7, system software must 662 * respond with page group response if private data 663 * is present (PDP) or last page in group (LPIG) bit 664 * is set. This is an additional VT-d feature beyond 665 * PCI ATS spec. 666 */ 667 if (!req->lpig && !req->priv_data_present) 668 return; 669 670 desc.qw0 = QI_PGRP_PASID(req->pasid) | 671 QI_PGRP_DID(req->rid) | 672 QI_PGRP_PASID_P(req->pasid_present) | 673 QI_PGRP_PDP(req->priv_data_present) | 674 QI_PGRP_RESP_CODE(result) | 675 QI_PGRP_RESP_TYPE; 676 desc.qw1 = QI_PGRP_IDX(req->prg_index) | 677 QI_PGRP_LPIG(req->lpig); 678 679 if (req->priv_data_present) { 680 desc.qw2 = req->priv_data[0]; 681 desc.qw3 = req->priv_data[1]; 682 } else { 683 desc.qw2 = 0; 684 desc.qw3 = 0; 685 } 686 687 qi_submit_sync(iommu, &desc, 1, 0); 688 } 689 690 static irqreturn_t prq_event_thread(int irq, void *d) 691 { 692 struct intel_iommu *iommu = d; 693 struct page_req_dsc *req; 694 int head, tail, handled; 695 struct pci_dev *pdev; 696 u64 address; 697 698 /* 699 * Clear PPR bit before reading head/tail registers, to ensure that 700 * we get a new interrupt if needed. 701 */ 702 writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); 703 704 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 705 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 706 handled = (head != tail); 707 while (head != tail) { 708 req = &iommu->prq[head / sizeof(*req)]; 709 address = (u64)req->addr << VTD_PAGE_SHIFT; 710 711 if (unlikely(!req->pasid_present)) { 712 pr_err("IOMMU: %s: Page request without PASID\n", 713 iommu->name); 714 bad_req: 715 handle_bad_prq_event(iommu, req, QI_RESP_INVALID); 716 goto prq_advance; 717 } 718 719 if (unlikely(!is_canonical_address(address))) { 720 pr_err("IOMMU: %s: Address is not canonical\n", 721 iommu->name); 722 goto bad_req; 723 } 724 725 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { 726 pr_err("IOMMU: %s: Page request in Privilege Mode\n", 727 iommu->name); 728 goto bad_req; 729 } 730 731 if (unlikely(req->exe_req && req->rd_req)) { 732 pr_err("IOMMU: %s: Execution request not supported\n", 733 iommu->name); 734 goto bad_req; 735 } 736 737 /* Drop Stop Marker message. No need for a response. */ 738 if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) 739 goto prq_advance; 740 741 pdev = pci_get_domain_bus_and_slot(iommu->segment, 742 PCI_BUS_NUM(req->rid), 743 req->rid & 0xff); 744 /* 745 * If prq is to be handled outside iommu driver via receiver of 746 * the fault notifiers, we skip the page response here. 747 */ 748 if (!pdev || intel_svm_prq_report(iommu, &pdev->dev, req)) 749 handle_bad_prq_event(iommu, req, QI_RESP_INVALID); 750 751 trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, 752 req->priv_data[0], req->priv_data[1], 753 iommu->prq_seq_number++); 754 prq_advance: 755 head = (head + sizeof(*req)) & PRQ_RING_MASK; 756 } 757 758 dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); 759 760 /* 761 * Clear the page request overflow bit and wake up all threads that 762 * are waiting for the completion of this handling. 763 */ 764 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 765 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", 766 iommu->name); 767 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 768 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 769 if (head == tail) { 770 iopf_queue_discard_partial(iommu->iopf_queue); 771 writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); 772 pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", 773 iommu->name); 774 } 775 } 776 777 if (!completion_done(&iommu->prq_complete)) 778 complete(&iommu->prq_complete); 779 780 return IRQ_RETVAL(handled); 781 } 782 783 struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) 784 { 785 struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); 786 unsigned int flags = 0; 787 struct iommu_sva *sva; 788 int ret; 789 790 if (drvdata) 791 flags = *(unsigned int *)drvdata; 792 793 if (flags & SVM_FLAG_SUPERVISOR_MODE) { 794 if (!ecap_srs(iommu->ecap)) { 795 dev_err(dev, "%s: Supervisor PASID not supported\n", 796 iommu->name); 797 return ERR_PTR(-EOPNOTSUPP); 798 } 799 800 if (mm) { 801 dev_err(dev, "%s: Supervisor PASID with user provided mm\n", 802 iommu->name); 803 return ERR_PTR(-EINVAL); 804 } 805 806 mm = &init_mm; 807 } 808 809 mutex_lock(&pasid_mutex); 810 ret = intel_svm_alloc_pasid(dev, mm, flags); 811 if (ret) { 812 mutex_unlock(&pasid_mutex); 813 return ERR_PTR(ret); 814 } 815 816 sva = intel_svm_bind_mm(iommu, dev, mm, flags); 817 mutex_unlock(&pasid_mutex); 818 819 return sva; 820 } 821 822 void intel_svm_unbind(struct iommu_sva *sva) 823 { 824 struct intel_svm_dev *sdev = to_intel_svm_dev(sva); 825 826 mutex_lock(&pasid_mutex); 827 intel_svm_unbind_mm(sdev->dev, sdev->pasid); 828 mutex_unlock(&pasid_mutex); 829 } 830 831 u32 intel_svm_get_pasid(struct iommu_sva *sva) 832 { 833 struct intel_svm_dev *sdev; 834 u32 pasid; 835 836 mutex_lock(&pasid_mutex); 837 sdev = to_intel_svm_dev(sva); 838 pasid = sdev->pasid; 839 mutex_unlock(&pasid_mutex); 840 841 return pasid; 842 } 843 844 int intel_svm_page_response(struct device *dev, 845 struct iommu_fault_event *evt, 846 struct iommu_page_response *msg) 847 { 848 struct iommu_fault_page_request *prm; 849 struct intel_iommu *iommu; 850 bool private_present; 851 bool pasid_present; 852 bool last_page; 853 u8 bus, devfn; 854 int ret = 0; 855 u16 sid; 856 857 if (!dev || !dev_is_pci(dev)) 858 return -ENODEV; 859 860 iommu = device_to_iommu(dev, &bus, &devfn); 861 if (!iommu) 862 return -ENODEV; 863 864 if (!msg || !evt) 865 return -EINVAL; 866 867 prm = &evt->fault.prm; 868 sid = PCI_DEVID(bus, devfn); 869 pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 870 private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; 871 last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 872 873 if (!pasid_present) { 874 ret = -EINVAL; 875 goto out; 876 } 877 878 if (prm->pasid == 0 || prm->pasid >= PASID_MAX) { 879 ret = -EINVAL; 880 goto out; 881 } 882 883 /* 884 * Per VT-d spec. v3.0 ch7.7, system software must respond 885 * with page group response if private data is present (PDP) 886 * or last page in group (LPIG) bit is set. This is an 887 * additional VT-d requirement beyond PCI ATS spec. 888 */ 889 if (last_page || private_present) { 890 struct qi_desc desc; 891 892 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | 893 QI_PGRP_PASID_P(pasid_present) | 894 QI_PGRP_PDP(private_present) | 895 QI_PGRP_RESP_CODE(msg->code) | 896 QI_PGRP_RESP_TYPE; 897 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); 898 desc.qw2 = 0; 899 desc.qw3 = 0; 900 901 if (private_present) { 902 desc.qw2 = prm->private_data[0]; 903 desc.qw3 = prm->private_data[1]; 904 } else if (prm->private_data[0]) { 905 dmar_latency_update(iommu, DMAR_LATENCY_PRQ, 906 ktime_to_ns(ktime_get()) - prm->private_data[0]); 907 } 908 909 qi_submit_sync(iommu, &desc, 1, 0); 910 } 911 out: 912 return ret; 913 } 914