1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2015 Intel Corporation. 4 * 5 * Authors: David Woodhouse <dwmw2@infradead.org> 6 */ 7 8 #include <linux/mmu_notifier.h> 9 #include <linux/sched.h> 10 #include <linux/sched/mm.h> 11 #include <linux/slab.h> 12 #include <linux/intel-svm.h> 13 #include <linux/rculist.h> 14 #include <linux/pci.h> 15 #include <linux/pci-ats.h> 16 #include <linux/dmar.h> 17 #include <linux/interrupt.h> 18 #include <linux/mm_types.h> 19 #include <linux/xarray.h> 20 #include <linux/ioasid.h> 21 #include <asm/page.h> 22 #include <asm/fpu/api.h> 23 24 #include "iommu.h" 25 #include "pasid.h" 26 #include "perf.h" 27 #include "../iommu-sva-lib.h" 28 #include "trace.h" 29 30 static irqreturn_t prq_event_thread(int irq, void *d); 31 static void intel_svm_drain_prq(struct device *dev, u32 pasid); 32 #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) 33 34 static DEFINE_XARRAY_ALLOC(pasid_private_array); 35 static int pasid_private_add(ioasid_t pasid, void *priv) 36 { 37 return xa_alloc(&pasid_private_array, &pasid, priv, 38 XA_LIMIT(pasid, pasid), GFP_ATOMIC); 39 } 40 41 static void pasid_private_remove(ioasid_t pasid) 42 { 43 xa_erase(&pasid_private_array, pasid); 44 } 45 46 static void *pasid_private_find(ioasid_t pasid) 47 { 48 return xa_load(&pasid_private_array, pasid); 49 } 50 51 static struct intel_svm_dev * 52 svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) 53 { 54 struct intel_svm_dev *sdev = NULL, *t; 55 56 rcu_read_lock(); 57 list_for_each_entry_rcu(t, &svm->devs, list) { 58 if (t->dev == dev) { 59 sdev = t; 60 break; 61 } 62 } 63 rcu_read_unlock(); 64 65 return sdev; 66 } 67 68 int intel_svm_enable_prq(struct intel_iommu *iommu) 69 { 70 struct iopf_queue *iopfq; 71 struct page *pages; 72 int irq, ret; 73 74 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); 75 if (!pages) { 76 pr_warn("IOMMU: %s: Failed to allocate page request queue\n", 77 iommu->name); 78 return -ENOMEM; 79 } 80 iommu->prq = page_address(pages); 81 82 irq = dmar_alloc_hwirq(DMAR_UNITS_SUPPORTED + iommu->seq_id, iommu->node, iommu); 83 if (irq <= 0) { 84 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", 85 iommu->name); 86 ret = -EINVAL; 87 goto free_prq; 88 } 89 iommu->pr_irq = irq; 90 91 snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), 92 "dmar%d-iopfq", iommu->seq_id); 93 iopfq = iopf_queue_alloc(iommu->iopfq_name); 94 if (!iopfq) { 95 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); 96 ret = -ENOMEM; 97 goto free_hwirq; 98 } 99 iommu->iopf_queue = iopfq; 100 101 snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); 102 103 ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, 104 iommu->prq_name, iommu); 105 if (ret) { 106 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", 107 iommu->name); 108 goto free_iopfq; 109 } 110 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); 111 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); 112 dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); 113 114 init_completion(&iommu->prq_complete); 115 116 return 0; 117 118 free_iopfq: 119 iopf_queue_free(iommu->iopf_queue); 120 iommu->iopf_queue = NULL; 121 free_hwirq: 122 dmar_free_hwirq(irq); 123 iommu->pr_irq = 0; 124 free_prq: 125 free_pages((unsigned long)iommu->prq, PRQ_ORDER); 126 iommu->prq = NULL; 127 128 return ret; 129 } 130 131 int intel_svm_finish_prq(struct intel_iommu *iommu) 132 { 133 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); 134 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); 135 dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); 136 137 if (iommu->pr_irq) { 138 free_irq(iommu->pr_irq, iommu); 139 dmar_free_hwirq(iommu->pr_irq); 140 iommu->pr_irq = 0; 141 } 142 143 if (iommu->iopf_queue) { 144 iopf_queue_free(iommu->iopf_queue); 145 iommu->iopf_queue = NULL; 146 } 147 148 free_pages((unsigned long)iommu->prq, PRQ_ORDER); 149 iommu->prq = NULL; 150 151 return 0; 152 } 153 154 void intel_svm_check(struct intel_iommu *iommu) 155 { 156 if (!pasid_supported(iommu)) 157 return; 158 159 if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && 160 !cap_fl1gp_support(iommu->cap)) { 161 pr_err("%s SVM disabled, incompatible 1GB page capability\n", 162 iommu->name); 163 return; 164 } 165 166 if (cpu_feature_enabled(X86_FEATURE_LA57) && 167 !cap_fl5lp_support(iommu->cap)) { 168 pr_err("%s SVM disabled, incompatible paging mode\n", 169 iommu->name); 170 return; 171 } 172 173 iommu->flags |= VTD_FLAG_SVM_CAPABLE; 174 } 175 176 static void __flush_svm_range_dev(struct intel_svm *svm, 177 struct intel_svm_dev *sdev, 178 unsigned long address, 179 unsigned long pages, int ih) 180 { 181 struct device_domain_info *info = dev_iommu_priv_get(sdev->dev); 182 183 if (WARN_ON(!pages)) 184 return; 185 186 qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); 187 if (info->ats_enabled) { 188 qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, 189 svm->pasid, sdev->qdep, address, 190 order_base_2(pages)); 191 quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), 192 svm->pasid, sdev->qdep); 193 } 194 } 195 196 static void intel_flush_svm_range_dev(struct intel_svm *svm, 197 struct intel_svm_dev *sdev, 198 unsigned long address, 199 unsigned long pages, int ih) 200 { 201 unsigned long shift = ilog2(__roundup_pow_of_two(pages)); 202 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); 203 unsigned long start = ALIGN_DOWN(address, align); 204 unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); 205 206 while (start < end) { 207 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); 208 start += align; 209 } 210 } 211 212 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, 213 unsigned long pages, int ih) 214 { 215 struct intel_svm_dev *sdev; 216 217 rcu_read_lock(); 218 list_for_each_entry_rcu(sdev, &svm->devs, list) 219 intel_flush_svm_range_dev(svm, sdev, address, pages, ih); 220 rcu_read_unlock(); 221 } 222 223 /* Pages have been freed at this point */ 224 static void intel_invalidate_range(struct mmu_notifier *mn, 225 struct mm_struct *mm, 226 unsigned long start, unsigned long end) 227 { 228 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 229 230 intel_flush_svm_range(svm, start, 231 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); 232 } 233 234 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) 235 { 236 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 237 struct intel_svm_dev *sdev; 238 239 /* This might end up being called from exit_mmap(), *before* the page 240 * tables are cleared. And __mmu_notifier_release() will delete us from 241 * the list of notifiers so that our invalidate_range() callback doesn't 242 * get called when the page tables are cleared. So we need to protect 243 * against hardware accessing those page tables. 244 * 245 * We do it by clearing the entry in the PASID table and then flushing 246 * the IOTLB and the PASID table caches. This might upset hardware; 247 * perhaps we'll want to point the PASID to a dummy PGD (like the zero 248 * page) so that we end up taking a fault that the hardware really 249 * *has* to handle gracefully without affecting other processes. 250 */ 251 rcu_read_lock(); 252 list_for_each_entry_rcu(sdev, &svm->devs, list) 253 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, 254 svm->pasid, true); 255 rcu_read_unlock(); 256 257 } 258 259 static const struct mmu_notifier_ops intel_mmuops = { 260 .release = intel_mm_release, 261 .invalidate_range = intel_invalidate_range, 262 }; 263 264 static DEFINE_MUTEX(pasid_mutex); 265 266 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, 267 struct intel_svm **rsvm, 268 struct intel_svm_dev **rsdev) 269 { 270 struct intel_svm_dev *sdev = NULL; 271 struct intel_svm *svm; 272 273 /* The caller should hold the pasid_mutex lock */ 274 if (WARN_ON(!mutex_is_locked(&pasid_mutex))) 275 return -EINVAL; 276 277 if (pasid == INVALID_IOASID || pasid >= PASID_MAX) 278 return -EINVAL; 279 280 svm = pasid_private_find(pasid); 281 if (IS_ERR(svm)) 282 return PTR_ERR(svm); 283 284 if (!svm) 285 goto out; 286 287 /* 288 * If we found svm for the PASID, there must be at least one device 289 * bond. 290 */ 291 if (WARN_ON(list_empty(&svm->devs))) 292 return -EINVAL; 293 sdev = svm_lookup_device_by_dev(svm, dev); 294 295 out: 296 *rsvm = svm; 297 *rsdev = sdev; 298 299 return 0; 300 } 301 302 static int intel_svm_alloc_pasid(struct device *dev, struct mm_struct *mm, 303 unsigned int flags) 304 { 305 ioasid_t max_pasid = dev_is_pci(dev) ? 306 pci_max_pasids(to_pci_dev(dev)) : intel_pasid_max_id; 307 308 return iommu_sva_alloc_pasid(mm, PASID_MIN, max_pasid - 1); 309 } 310 311 static struct iommu_sva *intel_svm_bind_mm(struct intel_iommu *iommu, 312 struct device *dev, 313 struct mm_struct *mm, 314 unsigned int flags) 315 { 316 struct device_domain_info *info = dev_iommu_priv_get(dev); 317 struct intel_svm_dev *sdev; 318 struct intel_svm *svm; 319 unsigned long sflags; 320 int ret = 0; 321 322 svm = pasid_private_find(mm->pasid); 323 if (!svm) { 324 svm = kzalloc(sizeof(*svm), GFP_KERNEL); 325 if (!svm) 326 return ERR_PTR(-ENOMEM); 327 328 svm->pasid = mm->pasid; 329 svm->mm = mm; 330 svm->flags = flags; 331 INIT_LIST_HEAD_RCU(&svm->devs); 332 333 if (!(flags & SVM_FLAG_SUPERVISOR_MODE)) { 334 svm->notifier.ops = &intel_mmuops; 335 ret = mmu_notifier_register(&svm->notifier, mm); 336 if (ret) { 337 kfree(svm); 338 return ERR_PTR(ret); 339 } 340 } 341 342 ret = pasid_private_add(svm->pasid, svm); 343 if (ret) { 344 if (svm->notifier.ops) 345 mmu_notifier_unregister(&svm->notifier, mm); 346 kfree(svm); 347 return ERR_PTR(ret); 348 } 349 } 350 351 /* Find the matching device in svm list */ 352 sdev = svm_lookup_device_by_dev(svm, dev); 353 if (sdev) { 354 sdev->users++; 355 goto success; 356 } 357 358 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); 359 if (!sdev) { 360 ret = -ENOMEM; 361 goto free_svm; 362 } 363 364 sdev->dev = dev; 365 sdev->iommu = iommu; 366 sdev->did = FLPT_DEFAULT_DID; 367 sdev->sid = PCI_DEVID(info->bus, info->devfn); 368 sdev->users = 1; 369 sdev->pasid = svm->pasid; 370 sdev->sva.dev = dev; 371 init_rcu_head(&sdev->rcu); 372 if (info->ats_enabled) { 373 sdev->dev_iotlb = 1; 374 sdev->qdep = info->ats_qdep; 375 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) 376 sdev->qdep = 0; 377 } 378 379 /* Setup the pasid table: */ 380 sflags = (flags & SVM_FLAG_SUPERVISOR_MODE) ? 381 PASID_FLAG_SUPERVISOR_MODE : 0; 382 sflags |= cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; 383 ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, 384 FLPT_DEFAULT_DID, sflags); 385 if (ret) 386 goto free_sdev; 387 388 list_add_rcu(&sdev->list, &svm->devs); 389 success: 390 return &sdev->sva; 391 392 free_sdev: 393 kfree(sdev); 394 free_svm: 395 if (list_empty(&svm->devs)) { 396 if (svm->notifier.ops) 397 mmu_notifier_unregister(&svm->notifier, mm); 398 pasid_private_remove(mm->pasid); 399 kfree(svm); 400 } 401 402 return ERR_PTR(ret); 403 } 404 405 /* Caller must hold pasid_mutex */ 406 static int intel_svm_unbind_mm(struct device *dev, u32 pasid) 407 { 408 struct intel_svm_dev *sdev; 409 struct intel_iommu *iommu; 410 struct intel_svm *svm; 411 struct mm_struct *mm; 412 int ret = -EINVAL; 413 414 iommu = device_to_iommu(dev, NULL, NULL); 415 if (!iommu) 416 goto out; 417 418 ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); 419 if (ret) 420 goto out; 421 mm = svm->mm; 422 423 if (sdev) { 424 sdev->users--; 425 if (!sdev->users) { 426 list_del_rcu(&sdev->list); 427 /* Flush the PASID cache and IOTLB for this device. 428 * Note that we do depend on the hardware *not* using 429 * the PASID any more. Just as we depend on other 430 * devices never using PASIDs that they have no right 431 * to use. We have a *shared* PASID table, because it's 432 * large and has to be physically contiguous. So it's 433 * hard to be as defensive as we might like. */ 434 intel_pasid_tear_down_entry(iommu, dev, 435 svm->pasid, false); 436 intel_svm_drain_prq(dev, svm->pasid); 437 kfree_rcu(sdev, rcu); 438 439 if (list_empty(&svm->devs)) { 440 if (svm->notifier.ops) 441 mmu_notifier_unregister(&svm->notifier, mm); 442 pasid_private_remove(svm->pasid); 443 /* We mandate that no page faults may be outstanding 444 * for the PASID when intel_svm_unbind_mm() is called. 445 * If that is not obeyed, subtle errors will happen. 446 * Let's make them less subtle... */ 447 memset(svm, 0x6b, sizeof(*svm)); 448 kfree(svm); 449 } 450 } 451 } 452 out: 453 return ret; 454 } 455 456 /* Page request queue descriptor */ 457 struct page_req_dsc { 458 union { 459 struct { 460 u64 type:8; 461 u64 pasid_present:1; 462 u64 priv_data_present:1; 463 u64 rsvd:6; 464 u64 rid:16; 465 u64 pasid:20; 466 u64 exe_req:1; 467 u64 pm_req:1; 468 u64 rsvd2:10; 469 }; 470 u64 qw_0; 471 }; 472 union { 473 struct { 474 u64 rd_req:1; 475 u64 wr_req:1; 476 u64 lpig:1; 477 u64 prg_index:9; 478 u64 addr:52; 479 }; 480 u64 qw_1; 481 }; 482 u64 priv_data[2]; 483 }; 484 485 static bool is_canonical_address(u64 addr) 486 { 487 int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); 488 long saddr = (long) addr; 489 490 return (((saddr << shift) >> shift) == saddr); 491 } 492 493 /** 494 * intel_svm_drain_prq - Drain page requests and responses for a pasid 495 * @dev: target device 496 * @pasid: pasid for draining 497 * 498 * Drain all pending page requests and responses related to @pasid in both 499 * software and hardware. This is supposed to be called after the device 500 * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB 501 * and DevTLB have been invalidated. 502 * 503 * It waits until all pending page requests for @pasid in the page fault 504 * queue are completed by the prq handling thread. Then follow the steps 505 * described in VT-d spec CH7.10 to drain all page requests and page 506 * responses pending in the hardware. 507 */ 508 static void intel_svm_drain_prq(struct device *dev, u32 pasid) 509 { 510 struct device_domain_info *info; 511 struct dmar_domain *domain; 512 struct intel_iommu *iommu; 513 struct qi_desc desc[3]; 514 struct pci_dev *pdev; 515 int head, tail; 516 u16 sid, did; 517 int qdep; 518 519 info = dev_iommu_priv_get(dev); 520 if (WARN_ON(!info || !dev_is_pci(dev))) 521 return; 522 523 if (!info->pri_enabled) 524 return; 525 526 iommu = info->iommu; 527 domain = info->domain; 528 pdev = to_pci_dev(dev); 529 sid = PCI_DEVID(info->bus, info->devfn); 530 did = domain_id_iommu(domain, iommu); 531 qdep = pci_ats_queue_depth(pdev); 532 533 /* 534 * Check and wait until all pending page requests in the queue are 535 * handled by the prq handling thread. 536 */ 537 prq_retry: 538 reinit_completion(&iommu->prq_complete); 539 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 540 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 541 while (head != tail) { 542 struct page_req_dsc *req; 543 544 req = &iommu->prq[head / sizeof(*req)]; 545 if (!req->pasid_present || req->pasid != pasid) { 546 head = (head + sizeof(*req)) & PRQ_RING_MASK; 547 continue; 548 } 549 550 wait_for_completion(&iommu->prq_complete); 551 goto prq_retry; 552 } 553 554 /* 555 * A work in IO page fault workqueue may try to lock pasid_mutex now. 556 * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for 557 * all works in the workqueue to finish may cause deadlock. 558 * 559 * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev(). 560 * Unlock it to allow the works to be handled while waiting for 561 * them to finish. 562 */ 563 lockdep_assert_held(&pasid_mutex); 564 mutex_unlock(&pasid_mutex); 565 iopf_queue_flush_dev(dev); 566 mutex_lock(&pasid_mutex); 567 568 /* 569 * Perform steps described in VT-d spec CH7.10 to drain page 570 * requests and responses in hardware. 571 */ 572 memset(desc, 0, sizeof(desc)); 573 desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) | 574 QI_IWD_FENCE | 575 QI_IWD_TYPE; 576 desc[1].qw0 = QI_EIOTLB_PASID(pasid) | 577 QI_EIOTLB_DID(did) | 578 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | 579 QI_EIOTLB_TYPE; 580 desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) | 581 QI_DEV_EIOTLB_SID(sid) | 582 QI_DEV_EIOTLB_QDEP(qdep) | 583 QI_DEIOTLB_TYPE | 584 QI_DEV_IOTLB_PFSID(info->pfsid); 585 qi_retry: 586 reinit_completion(&iommu->prq_complete); 587 qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN); 588 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 589 wait_for_completion(&iommu->prq_complete); 590 goto qi_retry; 591 } 592 } 593 594 static int prq_to_iommu_prot(struct page_req_dsc *req) 595 { 596 int prot = 0; 597 598 if (req->rd_req) 599 prot |= IOMMU_FAULT_PERM_READ; 600 if (req->wr_req) 601 prot |= IOMMU_FAULT_PERM_WRITE; 602 if (req->exe_req) 603 prot |= IOMMU_FAULT_PERM_EXEC; 604 if (req->pm_req) 605 prot |= IOMMU_FAULT_PERM_PRIV; 606 607 return prot; 608 } 609 610 static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, 611 struct page_req_dsc *desc) 612 { 613 struct iommu_fault_event event; 614 615 if (!dev || !dev_is_pci(dev)) 616 return -ENODEV; 617 618 /* Fill in event data for device specific processing */ 619 memset(&event, 0, sizeof(struct iommu_fault_event)); 620 event.fault.type = IOMMU_FAULT_PAGE_REQ; 621 event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; 622 event.fault.prm.pasid = desc->pasid; 623 event.fault.prm.grpid = desc->prg_index; 624 event.fault.prm.perm = prq_to_iommu_prot(desc); 625 626 if (desc->lpig) 627 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 628 if (desc->pasid_present) { 629 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 630 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 631 } 632 if (desc->priv_data_present) { 633 /* 634 * Set last page in group bit if private data is present, 635 * page response is required as it does for LPIG. 636 * iommu_report_device_fault() doesn't understand this vendor 637 * specific requirement thus we set last_page as a workaround. 638 */ 639 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 640 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; 641 event.fault.prm.private_data[0] = desc->priv_data[0]; 642 event.fault.prm.private_data[1] = desc->priv_data[1]; 643 } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { 644 /* 645 * If the private data fields are not used by hardware, use it 646 * to monitor the prq handle latency. 647 */ 648 event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); 649 } 650 651 return iommu_report_device_fault(dev, &event); 652 } 653 654 static void handle_bad_prq_event(struct intel_iommu *iommu, 655 struct page_req_dsc *req, int result) 656 { 657 struct qi_desc desc; 658 659 pr_err("%s: Invalid page request: %08llx %08llx\n", 660 iommu->name, ((unsigned long long *)req)[0], 661 ((unsigned long long *)req)[1]); 662 663 /* 664 * Per VT-d spec. v3.0 ch7.7, system software must 665 * respond with page group response if private data 666 * is present (PDP) or last page in group (LPIG) bit 667 * is set. This is an additional VT-d feature beyond 668 * PCI ATS spec. 669 */ 670 if (!req->lpig && !req->priv_data_present) 671 return; 672 673 desc.qw0 = QI_PGRP_PASID(req->pasid) | 674 QI_PGRP_DID(req->rid) | 675 QI_PGRP_PASID_P(req->pasid_present) | 676 QI_PGRP_PDP(req->priv_data_present) | 677 QI_PGRP_RESP_CODE(result) | 678 QI_PGRP_RESP_TYPE; 679 desc.qw1 = QI_PGRP_IDX(req->prg_index) | 680 QI_PGRP_LPIG(req->lpig); 681 682 if (req->priv_data_present) { 683 desc.qw2 = req->priv_data[0]; 684 desc.qw3 = req->priv_data[1]; 685 } else { 686 desc.qw2 = 0; 687 desc.qw3 = 0; 688 } 689 690 qi_submit_sync(iommu, &desc, 1, 0); 691 } 692 693 static irqreturn_t prq_event_thread(int irq, void *d) 694 { 695 struct intel_iommu *iommu = d; 696 struct page_req_dsc *req; 697 int head, tail, handled; 698 struct pci_dev *pdev; 699 u64 address; 700 701 /* 702 * Clear PPR bit before reading head/tail registers, to ensure that 703 * we get a new interrupt if needed. 704 */ 705 writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); 706 707 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 708 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 709 handled = (head != tail); 710 while (head != tail) { 711 req = &iommu->prq[head / sizeof(*req)]; 712 address = (u64)req->addr << VTD_PAGE_SHIFT; 713 714 if (unlikely(!req->pasid_present)) { 715 pr_err("IOMMU: %s: Page request without PASID\n", 716 iommu->name); 717 bad_req: 718 handle_bad_prq_event(iommu, req, QI_RESP_INVALID); 719 goto prq_advance; 720 } 721 722 if (unlikely(!is_canonical_address(address))) { 723 pr_err("IOMMU: %s: Address is not canonical\n", 724 iommu->name); 725 goto bad_req; 726 } 727 728 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { 729 pr_err("IOMMU: %s: Page request in Privilege Mode\n", 730 iommu->name); 731 goto bad_req; 732 } 733 734 if (unlikely(req->exe_req && req->rd_req)) { 735 pr_err("IOMMU: %s: Execution request not supported\n", 736 iommu->name); 737 goto bad_req; 738 } 739 740 /* Drop Stop Marker message. No need for a response. */ 741 if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) 742 goto prq_advance; 743 744 pdev = pci_get_domain_bus_and_slot(iommu->segment, 745 PCI_BUS_NUM(req->rid), 746 req->rid & 0xff); 747 /* 748 * If prq is to be handled outside iommu driver via receiver of 749 * the fault notifiers, we skip the page response here. 750 */ 751 if (!pdev) 752 goto bad_req; 753 754 if (intel_svm_prq_report(iommu, &pdev->dev, req)) 755 handle_bad_prq_event(iommu, req, QI_RESP_INVALID); 756 else 757 trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, 758 req->priv_data[0], req->priv_data[1], 759 iommu->prq_seq_number++); 760 pci_dev_put(pdev); 761 prq_advance: 762 head = (head + sizeof(*req)) & PRQ_RING_MASK; 763 } 764 765 dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); 766 767 /* 768 * Clear the page request overflow bit and wake up all threads that 769 * are waiting for the completion of this handling. 770 */ 771 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 772 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", 773 iommu->name); 774 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 775 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 776 if (head == tail) { 777 iopf_queue_discard_partial(iommu->iopf_queue); 778 writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); 779 pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", 780 iommu->name); 781 } 782 } 783 784 if (!completion_done(&iommu->prq_complete)) 785 complete(&iommu->prq_complete); 786 787 return IRQ_RETVAL(handled); 788 } 789 790 struct iommu_sva *intel_svm_bind(struct device *dev, struct mm_struct *mm, void *drvdata) 791 { 792 struct intel_iommu *iommu = device_to_iommu(dev, NULL, NULL); 793 unsigned int flags = 0; 794 struct iommu_sva *sva; 795 int ret; 796 797 if (drvdata) 798 flags = *(unsigned int *)drvdata; 799 800 if (flags & SVM_FLAG_SUPERVISOR_MODE) { 801 if (!ecap_srs(iommu->ecap)) { 802 dev_err(dev, "%s: Supervisor PASID not supported\n", 803 iommu->name); 804 return ERR_PTR(-EOPNOTSUPP); 805 } 806 807 if (mm) { 808 dev_err(dev, "%s: Supervisor PASID with user provided mm\n", 809 iommu->name); 810 return ERR_PTR(-EINVAL); 811 } 812 813 mm = &init_mm; 814 } 815 816 mutex_lock(&pasid_mutex); 817 ret = intel_svm_alloc_pasid(dev, mm, flags); 818 if (ret) { 819 mutex_unlock(&pasid_mutex); 820 return ERR_PTR(ret); 821 } 822 823 sva = intel_svm_bind_mm(iommu, dev, mm, flags); 824 mutex_unlock(&pasid_mutex); 825 826 return sva; 827 } 828 829 void intel_svm_unbind(struct iommu_sva *sva) 830 { 831 struct intel_svm_dev *sdev = to_intel_svm_dev(sva); 832 833 mutex_lock(&pasid_mutex); 834 intel_svm_unbind_mm(sdev->dev, sdev->pasid); 835 mutex_unlock(&pasid_mutex); 836 } 837 838 u32 intel_svm_get_pasid(struct iommu_sva *sva) 839 { 840 struct intel_svm_dev *sdev; 841 u32 pasid; 842 843 mutex_lock(&pasid_mutex); 844 sdev = to_intel_svm_dev(sva); 845 pasid = sdev->pasid; 846 mutex_unlock(&pasid_mutex); 847 848 return pasid; 849 } 850 851 int intel_svm_page_response(struct device *dev, 852 struct iommu_fault_event *evt, 853 struct iommu_page_response *msg) 854 { 855 struct iommu_fault_page_request *prm; 856 struct intel_iommu *iommu; 857 bool private_present; 858 bool pasid_present; 859 bool last_page; 860 u8 bus, devfn; 861 int ret = 0; 862 u16 sid; 863 864 if (!dev || !dev_is_pci(dev)) 865 return -ENODEV; 866 867 iommu = device_to_iommu(dev, &bus, &devfn); 868 if (!iommu) 869 return -ENODEV; 870 871 if (!msg || !evt) 872 return -EINVAL; 873 874 prm = &evt->fault.prm; 875 sid = PCI_DEVID(bus, devfn); 876 pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 877 private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; 878 last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 879 880 if (!pasid_present) { 881 ret = -EINVAL; 882 goto out; 883 } 884 885 if (prm->pasid == 0 || prm->pasid >= PASID_MAX) { 886 ret = -EINVAL; 887 goto out; 888 } 889 890 /* 891 * Per VT-d spec. v3.0 ch7.7, system software must respond 892 * with page group response if private data is present (PDP) 893 * or last page in group (LPIG) bit is set. This is an 894 * additional VT-d requirement beyond PCI ATS spec. 895 */ 896 if (last_page || private_present) { 897 struct qi_desc desc; 898 899 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | 900 QI_PGRP_PASID_P(pasid_present) | 901 QI_PGRP_PDP(private_present) | 902 QI_PGRP_RESP_CODE(msg->code) | 903 QI_PGRP_RESP_TYPE; 904 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); 905 desc.qw2 = 0; 906 desc.qw3 = 0; 907 908 if (private_present) { 909 desc.qw2 = prm->private_data[0]; 910 desc.qw3 = prm->private_data[1]; 911 } else if (prm->private_data[0]) { 912 dmar_latency_update(iommu, DMAR_LATENCY_PRQ, 913 ktime_to_ns(ktime_get()) - prm->private_data[0]); 914 } 915 916 qi_submit_sync(iommu, &desc, 1, 0); 917 } 918 out: 919 return ret; 920 } 921