1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright © 2015 Intel Corporation. 4 * 5 * Authors: David Woodhouse <dwmw2@infradead.org> 6 */ 7 8 #include <linux/mmu_notifier.h> 9 #include <linux/sched.h> 10 #include <linux/sched/mm.h> 11 #include <linux/slab.h> 12 #include <linux/rculist.h> 13 #include <linux/pci.h> 14 #include <linux/pci-ats.h> 15 #include <linux/dmar.h> 16 #include <linux/interrupt.h> 17 #include <linux/mm_types.h> 18 #include <linux/xarray.h> 19 #include <asm/page.h> 20 #include <asm/fpu/api.h> 21 22 #include "iommu.h" 23 #include "pasid.h" 24 #include "perf.h" 25 #include "../iommu-sva.h" 26 #include "trace.h" 27 28 static irqreturn_t prq_event_thread(int irq, void *d); 29 static void intel_svm_drain_prq(struct device *dev, u32 pasid); 30 #define to_intel_svm_dev(handle) container_of(handle, struct intel_svm_dev, sva) 31 32 static DEFINE_XARRAY_ALLOC(pasid_private_array); 33 static int pasid_private_add(ioasid_t pasid, void *priv) 34 { 35 return xa_alloc(&pasid_private_array, &pasid, priv, 36 XA_LIMIT(pasid, pasid), GFP_ATOMIC); 37 } 38 39 static void pasid_private_remove(ioasid_t pasid) 40 { 41 xa_erase(&pasid_private_array, pasid); 42 } 43 44 static void *pasid_private_find(ioasid_t pasid) 45 { 46 return xa_load(&pasid_private_array, pasid); 47 } 48 49 static struct intel_svm_dev * 50 svm_lookup_device_by_dev(struct intel_svm *svm, struct device *dev) 51 { 52 struct intel_svm_dev *sdev = NULL, *t; 53 54 rcu_read_lock(); 55 list_for_each_entry_rcu(t, &svm->devs, list) { 56 if (t->dev == dev) { 57 sdev = t; 58 break; 59 } 60 } 61 rcu_read_unlock(); 62 63 return sdev; 64 } 65 66 int intel_svm_enable_prq(struct intel_iommu *iommu) 67 { 68 struct iopf_queue *iopfq; 69 struct page *pages; 70 int irq, ret; 71 72 pages = alloc_pages(GFP_KERNEL | __GFP_ZERO, PRQ_ORDER); 73 if (!pages) { 74 pr_warn("IOMMU: %s: Failed to allocate page request queue\n", 75 iommu->name); 76 return -ENOMEM; 77 } 78 iommu->prq = page_address(pages); 79 80 irq = dmar_alloc_hwirq(IOMMU_IRQ_ID_OFFSET_PRQ + iommu->seq_id, iommu->node, iommu); 81 if (irq <= 0) { 82 pr_err("IOMMU: %s: Failed to create IRQ vector for page request queue\n", 83 iommu->name); 84 ret = -EINVAL; 85 goto free_prq; 86 } 87 iommu->pr_irq = irq; 88 89 snprintf(iommu->iopfq_name, sizeof(iommu->iopfq_name), 90 "dmar%d-iopfq", iommu->seq_id); 91 iopfq = iopf_queue_alloc(iommu->iopfq_name); 92 if (!iopfq) { 93 pr_err("IOMMU: %s: Failed to allocate iopf queue\n", iommu->name); 94 ret = -ENOMEM; 95 goto free_hwirq; 96 } 97 iommu->iopf_queue = iopfq; 98 99 snprintf(iommu->prq_name, sizeof(iommu->prq_name), "dmar%d-prq", iommu->seq_id); 100 101 ret = request_threaded_irq(irq, NULL, prq_event_thread, IRQF_ONESHOT, 102 iommu->prq_name, iommu); 103 if (ret) { 104 pr_err("IOMMU: %s: Failed to request IRQ for page request queue\n", 105 iommu->name); 106 goto free_iopfq; 107 } 108 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); 109 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); 110 dmar_writeq(iommu->reg + DMAR_PQA_REG, virt_to_phys(iommu->prq) | PRQ_ORDER); 111 112 init_completion(&iommu->prq_complete); 113 114 return 0; 115 116 free_iopfq: 117 iopf_queue_free(iommu->iopf_queue); 118 iommu->iopf_queue = NULL; 119 free_hwirq: 120 dmar_free_hwirq(irq); 121 iommu->pr_irq = 0; 122 free_prq: 123 free_pages((unsigned long)iommu->prq, PRQ_ORDER); 124 iommu->prq = NULL; 125 126 return ret; 127 } 128 129 int intel_svm_finish_prq(struct intel_iommu *iommu) 130 { 131 dmar_writeq(iommu->reg + DMAR_PQH_REG, 0ULL); 132 dmar_writeq(iommu->reg + DMAR_PQT_REG, 0ULL); 133 dmar_writeq(iommu->reg + DMAR_PQA_REG, 0ULL); 134 135 if (iommu->pr_irq) { 136 free_irq(iommu->pr_irq, iommu); 137 dmar_free_hwirq(iommu->pr_irq); 138 iommu->pr_irq = 0; 139 } 140 141 if (iommu->iopf_queue) { 142 iopf_queue_free(iommu->iopf_queue); 143 iommu->iopf_queue = NULL; 144 } 145 146 free_pages((unsigned long)iommu->prq, PRQ_ORDER); 147 iommu->prq = NULL; 148 149 return 0; 150 } 151 152 void intel_svm_check(struct intel_iommu *iommu) 153 { 154 if (!pasid_supported(iommu)) 155 return; 156 157 if (cpu_feature_enabled(X86_FEATURE_GBPAGES) && 158 !cap_fl1gp_support(iommu->cap)) { 159 pr_err("%s SVM disabled, incompatible 1GB page capability\n", 160 iommu->name); 161 return; 162 } 163 164 if (cpu_feature_enabled(X86_FEATURE_LA57) && 165 !cap_fl5lp_support(iommu->cap)) { 166 pr_err("%s SVM disabled, incompatible paging mode\n", 167 iommu->name); 168 return; 169 } 170 171 iommu->flags |= VTD_FLAG_SVM_CAPABLE; 172 } 173 174 static void __flush_svm_range_dev(struct intel_svm *svm, 175 struct intel_svm_dev *sdev, 176 unsigned long address, 177 unsigned long pages, int ih) 178 { 179 struct device_domain_info *info = dev_iommu_priv_get(sdev->dev); 180 181 if (WARN_ON(!pages)) 182 return; 183 184 qi_flush_piotlb(sdev->iommu, sdev->did, svm->pasid, address, pages, ih); 185 if (info->ats_enabled) { 186 qi_flush_dev_iotlb_pasid(sdev->iommu, sdev->sid, info->pfsid, 187 svm->pasid, sdev->qdep, address, 188 order_base_2(pages)); 189 quirk_extra_dev_tlb_flush(info, address, order_base_2(pages), 190 svm->pasid, sdev->qdep); 191 } 192 } 193 194 static void intel_flush_svm_range_dev(struct intel_svm *svm, 195 struct intel_svm_dev *sdev, 196 unsigned long address, 197 unsigned long pages, int ih) 198 { 199 unsigned long shift = ilog2(__roundup_pow_of_two(pages)); 200 unsigned long align = (1ULL << (VTD_PAGE_SHIFT + shift)); 201 unsigned long start = ALIGN_DOWN(address, align); 202 unsigned long end = ALIGN(address + (pages << VTD_PAGE_SHIFT), align); 203 204 while (start < end) { 205 __flush_svm_range_dev(svm, sdev, start, align >> VTD_PAGE_SHIFT, ih); 206 start += align; 207 } 208 } 209 210 static void intel_flush_svm_range(struct intel_svm *svm, unsigned long address, 211 unsigned long pages, int ih) 212 { 213 struct intel_svm_dev *sdev; 214 215 rcu_read_lock(); 216 list_for_each_entry_rcu(sdev, &svm->devs, list) 217 intel_flush_svm_range_dev(svm, sdev, address, pages, ih); 218 rcu_read_unlock(); 219 } 220 221 /* Pages have been freed at this point */ 222 static void intel_arch_invalidate_secondary_tlbs(struct mmu_notifier *mn, 223 struct mm_struct *mm, 224 unsigned long start, unsigned long end) 225 { 226 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 227 228 intel_flush_svm_range(svm, start, 229 (end - start + PAGE_SIZE - 1) >> VTD_PAGE_SHIFT, 0); 230 } 231 232 static void intel_mm_release(struct mmu_notifier *mn, struct mm_struct *mm) 233 { 234 struct intel_svm *svm = container_of(mn, struct intel_svm, notifier); 235 struct intel_svm_dev *sdev; 236 237 /* This might end up being called from exit_mmap(), *before* the page 238 * tables are cleared. And __mmu_notifier_release() will delete us from 239 * the list of notifiers so that our invalidate_range() callback doesn't 240 * get called when the page tables are cleared. So we need to protect 241 * against hardware accessing those page tables. 242 * 243 * We do it by clearing the entry in the PASID table and then flushing 244 * the IOTLB and the PASID table caches. This might upset hardware; 245 * perhaps we'll want to point the PASID to a dummy PGD (like the zero 246 * page) so that we end up taking a fault that the hardware really 247 * *has* to handle gracefully without affecting other processes. 248 */ 249 rcu_read_lock(); 250 list_for_each_entry_rcu(sdev, &svm->devs, list) 251 intel_pasid_tear_down_entry(sdev->iommu, sdev->dev, 252 svm->pasid, true); 253 rcu_read_unlock(); 254 255 } 256 257 static const struct mmu_notifier_ops intel_mmuops = { 258 .release = intel_mm_release, 259 .arch_invalidate_secondary_tlbs = intel_arch_invalidate_secondary_tlbs, 260 }; 261 262 static DEFINE_MUTEX(pasid_mutex); 263 264 static int pasid_to_svm_sdev(struct device *dev, unsigned int pasid, 265 struct intel_svm **rsvm, 266 struct intel_svm_dev **rsdev) 267 { 268 struct intel_svm_dev *sdev = NULL; 269 struct intel_svm *svm; 270 271 /* The caller should hold the pasid_mutex lock */ 272 if (WARN_ON(!mutex_is_locked(&pasid_mutex))) 273 return -EINVAL; 274 275 if (pasid == IOMMU_PASID_INVALID || pasid >= PASID_MAX) 276 return -EINVAL; 277 278 svm = pasid_private_find(pasid); 279 if (IS_ERR(svm)) 280 return PTR_ERR(svm); 281 282 if (!svm) 283 goto out; 284 285 /* 286 * If we found svm for the PASID, there must be at least one device 287 * bond. 288 */ 289 if (WARN_ON(list_empty(&svm->devs))) 290 return -EINVAL; 291 sdev = svm_lookup_device_by_dev(svm, dev); 292 293 out: 294 *rsvm = svm; 295 *rsdev = sdev; 296 297 return 0; 298 } 299 300 static int intel_svm_bind_mm(struct intel_iommu *iommu, struct device *dev, 301 struct mm_struct *mm) 302 { 303 struct device_domain_info *info = dev_iommu_priv_get(dev); 304 struct intel_svm_dev *sdev; 305 struct intel_svm *svm; 306 unsigned long sflags; 307 int ret = 0; 308 309 svm = pasid_private_find(mm->pasid); 310 if (!svm) { 311 svm = kzalloc(sizeof(*svm), GFP_KERNEL); 312 if (!svm) 313 return -ENOMEM; 314 315 svm->pasid = mm->pasid; 316 svm->mm = mm; 317 INIT_LIST_HEAD_RCU(&svm->devs); 318 319 svm->notifier.ops = &intel_mmuops; 320 ret = mmu_notifier_register(&svm->notifier, mm); 321 if (ret) { 322 kfree(svm); 323 return ret; 324 } 325 326 ret = pasid_private_add(svm->pasid, svm); 327 if (ret) { 328 mmu_notifier_unregister(&svm->notifier, mm); 329 kfree(svm); 330 return ret; 331 } 332 } 333 334 sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); 335 if (!sdev) { 336 ret = -ENOMEM; 337 goto free_svm; 338 } 339 340 sdev->dev = dev; 341 sdev->iommu = iommu; 342 sdev->did = FLPT_DEFAULT_DID; 343 sdev->sid = PCI_DEVID(info->bus, info->devfn); 344 init_rcu_head(&sdev->rcu); 345 if (info->ats_enabled) { 346 sdev->qdep = info->ats_qdep; 347 if (sdev->qdep >= QI_DEV_EIOTLB_MAX_INVS) 348 sdev->qdep = 0; 349 } 350 351 /* Setup the pasid table: */ 352 sflags = cpu_feature_enabled(X86_FEATURE_LA57) ? PASID_FLAG_FL5LP : 0; 353 ret = intel_pasid_setup_first_level(iommu, dev, mm->pgd, mm->pasid, 354 FLPT_DEFAULT_DID, sflags); 355 if (ret) 356 goto free_sdev; 357 358 list_add_rcu(&sdev->list, &svm->devs); 359 360 return 0; 361 362 free_sdev: 363 kfree(sdev); 364 free_svm: 365 if (list_empty(&svm->devs)) { 366 mmu_notifier_unregister(&svm->notifier, mm); 367 pasid_private_remove(mm->pasid); 368 kfree(svm); 369 } 370 371 return ret; 372 } 373 374 /* Caller must hold pasid_mutex */ 375 static int intel_svm_unbind_mm(struct device *dev, u32 pasid) 376 { 377 struct intel_svm_dev *sdev; 378 struct intel_iommu *iommu; 379 struct intel_svm *svm; 380 struct mm_struct *mm; 381 int ret = -EINVAL; 382 383 iommu = device_to_iommu(dev, NULL, NULL); 384 if (!iommu) 385 goto out; 386 387 ret = pasid_to_svm_sdev(dev, pasid, &svm, &sdev); 388 if (ret) 389 goto out; 390 mm = svm->mm; 391 392 if (sdev) { 393 list_del_rcu(&sdev->list); 394 /* 395 * Flush the PASID cache and IOTLB for this device. 396 * Note that we do depend on the hardware *not* using 397 * the PASID any more. Just as we depend on other 398 * devices never using PASIDs that they have no right 399 * to use. We have a *shared* PASID table, because it's 400 * large and has to be physically contiguous. So it's 401 * hard to be as defensive as we might like. 402 */ 403 intel_pasid_tear_down_entry(iommu, dev, svm->pasid, false); 404 intel_svm_drain_prq(dev, svm->pasid); 405 kfree_rcu(sdev, rcu); 406 407 if (list_empty(&svm->devs)) { 408 if (svm->notifier.ops) 409 mmu_notifier_unregister(&svm->notifier, mm); 410 pasid_private_remove(svm->pasid); 411 /* 412 * We mandate that no page faults may be outstanding 413 * for the PASID when intel_svm_unbind_mm() is called. 414 * If that is not obeyed, subtle errors will happen. 415 * Let's make them less subtle... 416 */ 417 memset(svm, 0x6b, sizeof(*svm)); 418 kfree(svm); 419 } 420 } 421 out: 422 return ret; 423 } 424 425 /* Page request queue descriptor */ 426 struct page_req_dsc { 427 union { 428 struct { 429 u64 type:8; 430 u64 pasid_present:1; 431 u64 priv_data_present:1; 432 u64 rsvd:6; 433 u64 rid:16; 434 u64 pasid:20; 435 u64 exe_req:1; 436 u64 pm_req:1; 437 u64 rsvd2:10; 438 }; 439 u64 qw_0; 440 }; 441 union { 442 struct { 443 u64 rd_req:1; 444 u64 wr_req:1; 445 u64 lpig:1; 446 u64 prg_index:9; 447 u64 addr:52; 448 }; 449 u64 qw_1; 450 }; 451 u64 priv_data[2]; 452 }; 453 454 static bool is_canonical_address(u64 addr) 455 { 456 int shift = 64 - (__VIRTUAL_MASK_SHIFT + 1); 457 long saddr = (long) addr; 458 459 return (((saddr << shift) >> shift) == saddr); 460 } 461 462 /** 463 * intel_svm_drain_prq - Drain page requests and responses for a pasid 464 * @dev: target device 465 * @pasid: pasid for draining 466 * 467 * Drain all pending page requests and responses related to @pasid in both 468 * software and hardware. This is supposed to be called after the device 469 * driver has stopped DMA, the pasid entry has been cleared, and both IOTLB 470 * and DevTLB have been invalidated. 471 * 472 * It waits until all pending page requests for @pasid in the page fault 473 * queue are completed by the prq handling thread. Then follow the steps 474 * described in VT-d spec CH7.10 to drain all page requests and page 475 * responses pending in the hardware. 476 */ 477 static void intel_svm_drain_prq(struct device *dev, u32 pasid) 478 { 479 struct device_domain_info *info; 480 struct dmar_domain *domain; 481 struct intel_iommu *iommu; 482 struct qi_desc desc[3]; 483 struct pci_dev *pdev; 484 int head, tail; 485 u16 sid, did; 486 int qdep; 487 488 info = dev_iommu_priv_get(dev); 489 if (WARN_ON(!info || !dev_is_pci(dev))) 490 return; 491 492 if (!info->pri_enabled) 493 return; 494 495 iommu = info->iommu; 496 domain = info->domain; 497 pdev = to_pci_dev(dev); 498 sid = PCI_DEVID(info->bus, info->devfn); 499 did = domain_id_iommu(domain, iommu); 500 qdep = pci_ats_queue_depth(pdev); 501 502 /* 503 * Check and wait until all pending page requests in the queue are 504 * handled by the prq handling thread. 505 */ 506 prq_retry: 507 reinit_completion(&iommu->prq_complete); 508 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 509 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 510 while (head != tail) { 511 struct page_req_dsc *req; 512 513 req = &iommu->prq[head / sizeof(*req)]; 514 if (!req->pasid_present || req->pasid != pasid) { 515 head = (head + sizeof(*req)) & PRQ_RING_MASK; 516 continue; 517 } 518 519 wait_for_completion(&iommu->prq_complete); 520 goto prq_retry; 521 } 522 523 /* 524 * A work in IO page fault workqueue may try to lock pasid_mutex now. 525 * Holding pasid_mutex while waiting in iopf_queue_flush_dev() for 526 * all works in the workqueue to finish may cause deadlock. 527 * 528 * It's unnecessary to hold pasid_mutex in iopf_queue_flush_dev(). 529 * Unlock it to allow the works to be handled while waiting for 530 * them to finish. 531 */ 532 lockdep_assert_held(&pasid_mutex); 533 mutex_unlock(&pasid_mutex); 534 iopf_queue_flush_dev(dev); 535 mutex_lock(&pasid_mutex); 536 537 /* 538 * Perform steps described in VT-d spec CH7.10 to drain page 539 * requests and responses in hardware. 540 */ 541 memset(desc, 0, sizeof(desc)); 542 desc[0].qw0 = QI_IWD_STATUS_DATA(QI_DONE) | 543 QI_IWD_FENCE | 544 QI_IWD_TYPE; 545 desc[1].qw0 = QI_EIOTLB_PASID(pasid) | 546 QI_EIOTLB_DID(did) | 547 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | 548 QI_EIOTLB_TYPE; 549 desc[2].qw0 = QI_DEV_EIOTLB_PASID(pasid) | 550 QI_DEV_EIOTLB_SID(sid) | 551 QI_DEV_EIOTLB_QDEP(qdep) | 552 QI_DEIOTLB_TYPE | 553 QI_DEV_IOTLB_PFSID(info->pfsid); 554 qi_retry: 555 reinit_completion(&iommu->prq_complete); 556 qi_submit_sync(iommu, desc, 3, QI_OPT_WAIT_DRAIN); 557 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 558 wait_for_completion(&iommu->prq_complete); 559 goto qi_retry; 560 } 561 } 562 563 static int prq_to_iommu_prot(struct page_req_dsc *req) 564 { 565 int prot = 0; 566 567 if (req->rd_req) 568 prot |= IOMMU_FAULT_PERM_READ; 569 if (req->wr_req) 570 prot |= IOMMU_FAULT_PERM_WRITE; 571 if (req->exe_req) 572 prot |= IOMMU_FAULT_PERM_EXEC; 573 if (req->pm_req) 574 prot |= IOMMU_FAULT_PERM_PRIV; 575 576 return prot; 577 } 578 579 static int intel_svm_prq_report(struct intel_iommu *iommu, struct device *dev, 580 struct page_req_dsc *desc) 581 { 582 struct iommu_fault_event event; 583 584 if (!dev || !dev_is_pci(dev)) 585 return -ENODEV; 586 587 /* Fill in event data for device specific processing */ 588 memset(&event, 0, sizeof(struct iommu_fault_event)); 589 event.fault.type = IOMMU_FAULT_PAGE_REQ; 590 event.fault.prm.addr = (u64)desc->addr << VTD_PAGE_SHIFT; 591 event.fault.prm.pasid = desc->pasid; 592 event.fault.prm.grpid = desc->prg_index; 593 event.fault.prm.perm = prq_to_iommu_prot(desc); 594 595 if (desc->lpig) 596 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 597 if (desc->pasid_present) { 598 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 599 event.fault.prm.flags |= IOMMU_FAULT_PAGE_RESPONSE_NEEDS_PASID; 600 } 601 if (desc->priv_data_present) { 602 /* 603 * Set last page in group bit if private data is present, 604 * page response is required as it does for LPIG. 605 * iommu_report_device_fault() doesn't understand this vendor 606 * specific requirement thus we set last_page as a workaround. 607 */ 608 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 609 event.fault.prm.flags |= IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; 610 event.fault.prm.private_data[0] = desc->priv_data[0]; 611 event.fault.prm.private_data[1] = desc->priv_data[1]; 612 } else if (dmar_latency_enabled(iommu, DMAR_LATENCY_PRQ)) { 613 /* 614 * If the private data fields are not used by hardware, use it 615 * to monitor the prq handle latency. 616 */ 617 event.fault.prm.private_data[0] = ktime_to_ns(ktime_get()); 618 } 619 620 return iommu_report_device_fault(dev, &event); 621 } 622 623 static void handle_bad_prq_event(struct intel_iommu *iommu, 624 struct page_req_dsc *req, int result) 625 { 626 struct qi_desc desc; 627 628 pr_err("%s: Invalid page request: %08llx %08llx\n", 629 iommu->name, ((unsigned long long *)req)[0], 630 ((unsigned long long *)req)[1]); 631 632 /* 633 * Per VT-d spec. v3.0 ch7.7, system software must 634 * respond with page group response if private data 635 * is present (PDP) or last page in group (LPIG) bit 636 * is set. This is an additional VT-d feature beyond 637 * PCI ATS spec. 638 */ 639 if (!req->lpig && !req->priv_data_present) 640 return; 641 642 desc.qw0 = QI_PGRP_PASID(req->pasid) | 643 QI_PGRP_DID(req->rid) | 644 QI_PGRP_PASID_P(req->pasid_present) | 645 QI_PGRP_PDP(req->priv_data_present) | 646 QI_PGRP_RESP_CODE(result) | 647 QI_PGRP_RESP_TYPE; 648 desc.qw1 = QI_PGRP_IDX(req->prg_index) | 649 QI_PGRP_LPIG(req->lpig); 650 651 if (req->priv_data_present) { 652 desc.qw2 = req->priv_data[0]; 653 desc.qw3 = req->priv_data[1]; 654 } else { 655 desc.qw2 = 0; 656 desc.qw3 = 0; 657 } 658 659 qi_submit_sync(iommu, &desc, 1, 0); 660 } 661 662 static irqreturn_t prq_event_thread(int irq, void *d) 663 { 664 struct intel_iommu *iommu = d; 665 struct page_req_dsc *req; 666 int head, tail, handled; 667 struct pci_dev *pdev; 668 u64 address; 669 670 /* 671 * Clear PPR bit before reading head/tail registers, to ensure that 672 * we get a new interrupt if needed. 673 */ 674 writel(DMA_PRS_PPR, iommu->reg + DMAR_PRS_REG); 675 676 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 677 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 678 handled = (head != tail); 679 while (head != tail) { 680 req = &iommu->prq[head / sizeof(*req)]; 681 address = (u64)req->addr << VTD_PAGE_SHIFT; 682 683 if (unlikely(!req->pasid_present)) { 684 pr_err("IOMMU: %s: Page request without PASID\n", 685 iommu->name); 686 bad_req: 687 handle_bad_prq_event(iommu, req, QI_RESP_INVALID); 688 goto prq_advance; 689 } 690 691 if (unlikely(!is_canonical_address(address))) { 692 pr_err("IOMMU: %s: Address is not canonical\n", 693 iommu->name); 694 goto bad_req; 695 } 696 697 if (unlikely(req->pm_req && (req->rd_req | req->wr_req))) { 698 pr_err("IOMMU: %s: Page request in Privilege Mode\n", 699 iommu->name); 700 goto bad_req; 701 } 702 703 if (unlikely(req->exe_req && req->rd_req)) { 704 pr_err("IOMMU: %s: Execution request not supported\n", 705 iommu->name); 706 goto bad_req; 707 } 708 709 /* Drop Stop Marker message. No need for a response. */ 710 if (unlikely(req->lpig && !req->rd_req && !req->wr_req)) 711 goto prq_advance; 712 713 pdev = pci_get_domain_bus_and_slot(iommu->segment, 714 PCI_BUS_NUM(req->rid), 715 req->rid & 0xff); 716 /* 717 * If prq is to be handled outside iommu driver via receiver of 718 * the fault notifiers, we skip the page response here. 719 */ 720 if (!pdev) 721 goto bad_req; 722 723 if (intel_svm_prq_report(iommu, &pdev->dev, req)) 724 handle_bad_prq_event(iommu, req, QI_RESP_INVALID); 725 else 726 trace_prq_report(iommu, &pdev->dev, req->qw_0, req->qw_1, 727 req->priv_data[0], req->priv_data[1], 728 iommu->prq_seq_number++); 729 pci_dev_put(pdev); 730 prq_advance: 731 head = (head + sizeof(*req)) & PRQ_RING_MASK; 732 } 733 734 dmar_writeq(iommu->reg + DMAR_PQH_REG, tail); 735 736 /* 737 * Clear the page request overflow bit and wake up all threads that 738 * are waiting for the completion of this handling. 739 */ 740 if (readl(iommu->reg + DMAR_PRS_REG) & DMA_PRS_PRO) { 741 pr_info_ratelimited("IOMMU: %s: PRQ overflow detected\n", 742 iommu->name); 743 head = dmar_readq(iommu->reg + DMAR_PQH_REG) & PRQ_RING_MASK; 744 tail = dmar_readq(iommu->reg + DMAR_PQT_REG) & PRQ_RING_MASK; 745 if (head == tail) { 746 iopf_queue_discard_partial(iommu->iopf_queue); 747 writel(DMA_PRS_PRO, iommu->reg + DMAR_PRS_REG); 748 pr_info_ratelimited("IOMMU: %s: PRQ overflow cleared", 749 iommu->name); 750 } 751 } 752 753 if (!completion_done(&iommu->prq_complete)) 754 complete(&iommu->prq_complete); 755 756 return IRQ_RETVAL(handled); 757 } 758 759 int intel_svm_page_response(struct device *dev, 760 struct iommu_fault_event *evt, 761 struct iommu_page_response *msg) 762 { 763 struct iommu_fault_page_request *prm; 764 struct intel_iommu *iommu; 765 bool private_present; 766 bool pasid_present; 767 bool last_page; 768 u8 bus, devfn; 769 int ret = 0; 770 u16 sid; 771 772 if (!dev || !dev_is_pci(dev)) 773 return -ENODEV; 774 775 iommu = device_to_iommu(dev, &bus, &devfn); 776 if (!iommu) 777 return -ENODEV; 778 779 if (!msg || !evt) 780 return -EINVAL; 781 782 prm = &evt->fault.prm; 783 sid = PCI_DEVID(bus, devfn); 784 pasid_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PASID_VALID; 785 private_present = prm->flags & IOMMU_FAULT_PAGE_REQUEST_PRIV_DATA; 786 last_page = prm->flags & IOMMU_FAULT_PAGE_REQUEST_LAST_PAGE; 787 788 if (!pasid_present) { 789 ret = -EINVAL; 790 goto out; 791 } 792 793 if (prm->pasid == 0 || prm->pasid >= PASID_MAX) { 794 ret = -EINVAL; 795 goto out; 796 } 797 798 /* 799 * Per VT-d spec. v3.0 ch7.7, system software must respond 800 * with page group response if private data is present (PDP) 801 * or last page in group (LPIG) bit is set. This is an 802 * additional VT-d requirement beyond PCI ATS spec. 803 */ 804 if (last_page || private_present) { 805 struct qi_desc desc; 806 807 desc.qw0 = QI_PGRP_PASID(prm->pasid) | QI_PGRP_DID(sid) | 808 QI_PGRP_PASID_P(pasid_present) | 809 QI_PGRP_PDP(private_present) | 810 QI_PGRP_RESP_CODE(msg->code) | 811 QI_PGRP_RESP_TYPE; 812 desc.qw1 = QI_PGRP_IDX(prm->grpid) | QI_PGRP_LPIG(last_page); 813 desc.qw2 = 0; 814 desc.qw3 = 0; 815 816 if (private_present) { 817 desc.qw2 = prm->private_data[0]; 818 desc.qw3 = prm->private_data[1]; 819 } else if (prm->private_data[0]) { 820 dmar_latency_update(iommu, DMAR_LATENCY_PRQ, 821 ktime_to_ns(ktime_get()) - prm->private_data[0]); 822 } 823 824 qi_submit_sync(iommu, &desc, 1, 0); 825 } 826 out: 827 return ret; 828 } 829 830 void intel_svm_remove_dev_pasid(struct device *dev, ioasid_t pasid) 831 { 832 mutex_lock(&pasid_mutex); 833 intel_svm_unbind_mm(dev, pasid); 834 mutex_unlock(&pasid_mutex); 835 } 836 837 static int intel_svm_set_dev_pasid(struct iommu_domain *domain, 838 struct device *dev, ioasid_t pasid) 839 { 840 struct device_domain_info *info = dev_iommu_priv_get(dev); 841 struct intel_iommu *iommu = info->iommu; 842 struct mm_struct *mm = domain->mm; 843 int ret; 844 845 mutex_lock(&pasid_mutex); 846 ret = intel_svm_bind_mm(iommu, dev, mm); 847 mutex_unlock(&pasid_mutex); 848 849 return ret; 850 } 851 852 static void intel_svm_domain_free(struct iommu_domain *domain) 853 { 854 kfree(to_dmar_domain(domain)); 855 } 856 857 static const struct iommu_domain_ops intel_svm_domain_ops = { 858 .set_dev_pasid = intel_svm_set_dev_pasid, 859 .free = intel_svm_domain_free 860 }; 861 862 struct iommu_domain *intel_svm_domain_alloc(void) 863 { 864 struct dmar_domain *domain; 865 866 domain = kzalloc(sizeof(*domain), GFP_KERNEL); 867 if (!domain) 868 return NULL; 869 domain->domain.ops = &intel_svm_domain_ops; 870 871 return &domain->domain; 872 } 873