1 // SPDX-License-Identifier: GPL-2.0 2 /** 3 * intel-pasid.c - PASID idr, table and entry manipulation 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/bitops.h> 13 #include <linux/cpufeature.h> 14 #include <linux/dmar.h> 15 #include <linux/intel-iommu.h> 16 #include <linux/iommu.h> 17 #include <linux/memory.h> 18 #include <linux/pci.h> 19 #include <linux/pci-ats.h> 20 #include <linux/spinlock.h> 21 22 #include "pasid.h" 23 24 /* 25 * Intel IOMMU system wide PASID name space: 26 */ 27 static DEFINE_SPINLOCK(pasid_lock); 28 u32 intel_pasid_max_id = PASID_MAX; 29 30 int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid) 31 { 32 unsigned long flags; 33 u8 status_code; 34 int ret = 0; 35 u64 res; 36 37 raw_spin_lock_irqsave(&iommu->register_lock, flags); 38 dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC); 39 IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, 40 !(res & VCMD_VRSP_IP), res); 41 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 42 43 status_code = VCMD_VRSP_SC(res); 44 switch (status_code) { 45 case VCMD_VRSP_SC_SUCCESS: 46 *pasid = VCMD_VRSP_RESULT_PASID(res); 47 break; 48 case VCMD_VRSP_SC_NO_PASID_AVAIL: 49 pr_info("IOMMU: %s: No PASID available\n", iommu->name); 50 ret = -ENOSPC; 51 break; 52 default: 53 ret = -ENODEV; 54 pr_warn("IOMMU: %s: Unexpected error code %d\n", 55 iommu->name, status_code); 56 } 57 58 return ret; 59 } 60 61 void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid) 62 { 63 unsigned long flags; 64 u8 status_code; 65 u64 res; 66 67 raw_spin_lock_irqsave(&iommu->register_lock, flags); 68 dmar_writeq(iommu->reg + DMAR_VCMD_REG, 69 VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE); 70 IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, 71 !(res & VCMD_VRSP_IP), res); 72 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 73 74 status_code = VCMD_VRSP_SC(res); 75 switch (status_code) { 76 case VCMD_VRSP_SC_SUCCESS: 77 break; 78 case VCMD_VRSP_SC_INVALID_PASID: 79 pr_info("IOMMU: %s: Invalid PASID\n", iommu->name); 80 break; 81 default: 82 pr_warn("IOMMU: %s: Unexpected error code %d\n", 83 iommu->name, status_code); 84 } 85 } 86 87 /* 88 * Per device pasid table management: 89 */ 90 static inline void 91 device_attach_pasid_table(struct device_domain_info *info, 92 struct pasid_table *pasid_table) 93 { 94 info->pasid_table = pasid_table; 95 list_add(&info->table, &pasid_table->dev); 96 } 97 98 static inline void 99 device_detach_pasid_table(struct device_domain_info *info, 100 struct pasid_table *pasid_table) 101 { 102 info->pasid_table = NULL; 103 list_del(&info->table); 104 } 105 106 struct pasid_table_opaque { 107 struct pasid_table **pasid_table; 108 int segment; 109 int bus; 110 int devfn; 111 }; 112 113 static int search_pasid_table(struct device_domain_info *info, void *opaque) 114 { 115 struct pasid_table_opaque *data = opaque; 116 117 if (info->iommu->segment == data->segment && 118 info->bus == data->bus && 119 info->devfn == data->devfn && 120 info->pasid_table) { 121 *data->pasid_table = info->pasid_table; 122 return 1; 123 } 124 125 return 0; 126 } 127 128 static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque) 129 { 130 struct pasid_table_opaque *data = opaque; 131 132 data->segment = pci_domain_nr(pdev->bus); 133 data->bus = PCI_BUS_NUM(alias); 134 data->devfn = alias & 0xff; 135 136 return for_each_device_domain(&search_pasid_table, data); 137 } 138 139 /* 140 * Allocate a pasid table for @dev. It should be called in a 141 * single-thread context. 142 */ 143 int intel_pasid_alloc_table(struct device *dev) 144 { 145 struct device_domain_info *info; 146 struct pasid_table *pasid_table; 147 struct pasid_table_opaque data; 148 struct page *pages; 149 u32 max_pasid = 0; 150 int ret, order; 151 int size; 152 153 might_sleep(); 154 info = get_domain_info(dev); 155 if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) 156 return -EINVAL; 157 158 /* DMA alias device already has a pasid table, use it: */ 159 data.pasid_table = &pasid_table; 160 ret = pci_for_each_dma_alias(to_pci_dev(dev), 161 &get_alias_pasid_table, &data); 162 if (ret) 163 goto attach_out; 164 165 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); 166 if (!pasid_table) 167 return -ENOMEM; 168 INIT_LIST_HEAD(&pasid_table->dev); 169 170 if (info->pasid_supported) 171 max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)), 172 intel_pasid_max_id); 173 174 size = max_pasid >> (PASID_PDE_SHIFT - 3); 175 order = size ? get_order(size) : 0; 176 pages = alloc_pages_node(info->iommu->node, 177 GFP_KERNEL | __GFP_ZERO, order); 178 if (!pages) { 179 kfree(pasid_table); 180 return -ENOMEM; 181 } 182 183 pasid_table->table = page_address(pages); 184 pasid_table->order = order; 185 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); 186 187 attach_out: 188 device_attach_pasid_table(info, pasid_table); 189 190 return 0; 191 } 192 193 void intel_pasid_free_table(struct device *dev) 194 { 195 struct device_domain_info *info; 196 struct pasid_table *pasid_table; 197 struct pasid_dir_entry *dir; 198 struct pasid_entry *table; 199 int i, max_pde; 200 201 info = get_domain_info(dev); 202 if (!info || !dev_is_pci(dev) || !info->pasid_table) 203 return; 204 205 pasid_table = info->pasid_table; 206 device_detach_pasid_table(info, pasid_table); 207 208 if (!list_empty(&pasid_table->dev)) 209 return; 210 211 /* Free scalable mode PASID directory tables: */ 212 dir = pasid_table->table; 213 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; 214 for (i = 0; i < max_pde; i++) { 215 table = get_pasid_table_from_pde(&dir[i]); 216 free_pgtable_page(table); 217 } 218 219 free_pages((unsigned long)pasid_table->table, pasid_table->order); 220 kfree(pasid_table); 221 } 222 223 struct pasid_table *intel_pasid_get_table(struct device *dev) 224 { 225 struct device_domain_info *info; 226 227 info = get_domain_info(dev); 228 if (!info) 229 return NULL; 230 231 return info->pasid_table; 232 } 233 234 int intel_pasid_get_dev_max_id(struct device *dev) 235 { 236 struct device_domain_info *info; 237 238 info = get_domain_info(dev); 239 if (!info || !info->pasid_table) 240 return 0; 241 242 return info->pasid_table->max_pasid; 243 } 244 245 struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) 246 { 247 struct device_domain_info *info; 248 struct pasid_table *pasid_table; 249 struct pasid_dir_entry *dir; 250 struct pasid_entry *entries; 251 int dir_index, index; 252 253 pasid_table = intel_pasid_get_table(dev); 254 if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev))) 255 return NULL; 256 257 dir = pasid_table->table; 258 info = get_domain_info(dev); 259 dir_index = pasid >> PASID_PDE_SHIFT; 260 index = pasid & PASID_PTE_MASK; 261 262 spin_lock(&pasid_lock); 263 entries = get_pasid_table_from_pde(&dir[dir_index]); 264 if (!entries) { 265 entries = alloc_pgtable_page(info->iommu->node); 266 if (!entries) { 267 spin_unlock(&pasid_lock); 268 return NULL; 269 } 270 271 WRITE_ONCE(dir[dir_index].val, 272 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT); 273 } 274 spin_unlock(&pasid_lock); 275 276 return &entries[index]; 277 } 278 279 /* 280 * Interfaces for PASID table entry manipulation: 281 */ 282 static inline void pasid_clear_entry(struct pasid_entry *pe) 283 { 284 WRITE_ONCE(pe->val[0], 0); 285 WRITE_ONCE(pe->val[1], 0); 286 WRITE_ONCE(pe->val[2], 0); 287 WRITE_ONCE(pe->val[3], 0); 288 WRITE_ONCE(pe->val[4], 0); 289 WRITE_ONCE(pe->val[5], 0); 290 WRITE_ONCE(pe->val[6], 0); 291 WRITE_ONCE(pe->val[7], 0); 292 } 293 294 static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) 295 { 296 WRITE_ONCE(pe->val[0], PASID_PTE_FPD); 297 WRITE_ONCE(pe->val[1], 0); 298 WRITE_ONCE(pe->val[2], 0); 299 WRITE_ONCE(pe->val[3], 0); 300 WRITE_ONCE(pe->val[4], 0); 301 WRITE_ONCE(pe->val[5], 0); 302 WRITE_ONCE(pe->val[6], 0); 303 WRITE_ONCE(pe->val[7], 0); 304 } 305 306 static void 307 intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore) 308 { 309 struct pasid_entry *pe; 310 311 pe = intel_pasid_get_entry(dev, pasid); 312 if (WARN_ON(!pe)) 313 return; 314 315 if (fault_ignore && pasid_pte_is_present(pe)) 316 pasid_clear_entry_with_fpd(pe); 317 else 318 pasid_clear_entry(pe); 319 } 320 321 static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) 322 { 323 u64 old; 324 325 old = READ_ONCE(*ptr); 326 WRITE_ONCE(*ptr, (old & ~mask) | bits); 327 } 328 329 /* 330 * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode 331 * PASID entry. 332 */ 333 static inline void 334 pasid_set_domain_id(struct pasid_entry *pe, u64 value) 335 { 336 pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value); 337 } 338 339 /* 340 * Get domain ID value of a scalable mode PASID entry. 341 */ 342 static inline u16 343 pasid_get_domain_id(struct pasid_entry *pe) 344 { 345 return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0)); 346 } 347 348 /* 349 * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63) 350 * of a scalable mode PASID entry. 351 */ 352 static inline void 353 pasid_set_slptr(struct pasid_entry *pe, u64 value) 354 { 355 pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value); 356 } 357 358 /* 359 * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID 360 * entry. 361 */ 362 static inline void 363 pasid_set_address_width(struct pasid_entry *pe, u64 value) 364 { 365 pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2); 366 } 367 368 /* 369 * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8) 370 * of a scalable mode PASID entry. 371 */ 372 static inline void 373 pasid_set_translation_type(struct pasid_entry *pe, u64 value) 374 { 375 pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6); 376 } 377 378 /* 379 * Enable fault processing by clearing the FPD(Fault Processing 380 * Disable) field (Bit 1) of a scalable mode PASID entry. 381 */ 382 static inline void pasid_set_fault_enable(struct pasid_entry *pe) 383 { 384 pasid_set_bits(&pe->val[0], 1 << 1, 0); 385 } 386 387 /* 388 * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a 389 * scalable mode PASID entry. 390 */ 391 static inline void pasid_set_sre(struct pasid_entry *pe) 392 { 393 pasid_set_bits(&pe->val[2], 1 << 0, 1); 394 } 395 396 /* 397 * Setup the P(Present) field (Bit 0) of a scalable mode PASID 398 * entry. 399 */ 400 static inline void pasid_set_present(struct pasid_entry *pe) 401 { 402 pasid_set_bits(&pe->val[0], 1 << 0, 1); 403 } 404 405 /* 406 * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID 407 * entry. 408 */ 409 static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) 410 { 411 pasid_set_bits(&pe->val[1], 1 << 23, value << 23); 412 } 413 414 /* 415 * Setup the First Level Page table Pointer field (Bit 140~191) 416 * of a scalable mode PASID entry. 417 */ 418 static inline void 419 pasid_set_flptr(struct pasid_entry *pe, u64 value) 420 { 421 pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value); 422 } 423 424 /* 425 * Setup the First Level Paging Mode field (Bit 130~131) of a 426 * scalable mode PASID entry. 427 */ 428 static inline void 429 pasid_set_flpm(struct pasid_entry *pe, u64 value) 430 { 431 pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); 432 } 433 434 /* 435 * Setup the Extended Access Flag Enable (EAFE) field (Bit 135) 436 * of a scalable mode PASID entry. 437 */ 438 static inline void 439 pasid_set_eafe(struct pasid_entry *pe) 440 { 441 pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7); 442 } 443 444 static void 445 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, 446 u16 did, u32 pasid) 447 { 448 struct qi_desc desc; 449 450 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) | 451 QI_PC_PASID(pasid) | QI_PC_TYPE; 452 desc.qw1 = 0; 453 desc.qw2 = 0; 454 desc.qw3 = 0; 455 456 qi_submit_sync(iommu, &desc, 1, 0); 457 } 458 459 static void 460 devtlb_invalidation_with_pasid(struct intel_iommu *iommu, 461 struct device *dev, u32 pasid) 462 { 463 struct device_domain_info *info; 464 u16 sid, qdep, pfsid; 465 466 info = get_domain_info(dev); 467 if (!info || !info->ats_enabled) 468 return; 469 470 sid = info->bus << 8 | info->devfn; 471 qdep = info->ats_qdep; 472 pfsid = info->pfsid; 473 474 /* 475 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID), 476 * devTLB flush w/o PASID should be used. For non-zero PASID under 477 * SVA usage, device could do DMA with multiple PASIDs. It is more 478 * efficient to flush devTLB specific to the PASID. 479 */ 480 if (pasid == PASID_RID2PASID) 481 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); 482 else 483 qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); 484 } 485 486 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, 487 u32 pasid, bool fault_ignore) 488 { 489 struct pasid_entry *pte; 490 u16 did; 491 492 pte = intel_pasid_get_entry(dev, pasid); 493 if (WARN_ON(!pte)) 494 return; 495 496 did = pasid_get_domain_id(pte); 497 intel_pasid_clear_entry(dev, pasid, fault_ignore); 498 499 if (!ecap_coherent(iommu->ecap)) 500 clflush_cache_range(pte, sizeof(*pte)); 501 502 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 503 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 504 505 /* Device IOTLB doesn't need to be flushed in caching mode. */ 506 if (!cap_caching_mode(iommu->cap)) 507 devtlb_invalidation_with_pasid(iommu, dev, pasid); 508 } 509 510 static void pasid_flush_caches(struct intel_iommu *iommu, 511 struct pasid_entry *pte, 512 u32 pasid, u16 did) 513 { 514 if (!ecap_coherent(iommu->ecap)) 515 clflush_cache_range(pte, sizeof(*pte)); 516 517 if (cap_caching_mode(iommu->cap)) { 518 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 519 qi_flush_piotlb(iommu, did, pasid, 0, -1, 0); 520 } else { 521 iommu_flush_write_buffer(iommu); 522 } 523 } 524 525 /* 526 * Set up the scalable mode pasid table entry for first only 527 * translation type. 528 */ 529 int intel_pasid_setup_first_level(struct intel_iommu *iommu, 530 struct device *dev, pgd_t *pgd, 531 u32 pasid, u16 did, int flags) 532 { 533 struct pasid_entry *pte; 534 535 if (!ecap_flts(iommu->ecap)) { 536 pr_err("No first level translation support on %s\n", 537 iommu->name); 538 return -EINVAL; 539 } 540 541 pte = intel_pasid_get_entry(dev, pasid); 542 if (WARN_ON(!pte)) 543 return -EINVAL; 544 545 pasid_clear_entry(pte); 546 547 /* Setup the first level page table pointer: */ 548 pasid_set_flptr(pte, (u64)__pa(pgd)); 549 if (flags & PASID_FLAG_SUPERVISOR_MODE) { 550 if (!ecap_srs(iommu->ecap)) { 551 pr_err("No supervisor request support on %s\n", 552 iommu->name); 553 return -EINVAL; 554 } 555 pasid_set_sre(pte); 556 } 557 558 if (flags & PASID_FLAG_FL5LP) { 559 if (cap_5lp_support(iommu->cap)) { 560 pasid_set_flpm(pte, 1); 561 } else { 562 pr_err("No 5-level paging support for first-level\n"); 563 pasid_clear_entry(pte); 564 return -EINVAL; 565 } 566 } 567 568 pasid_set_domain_id(pte, did); 569 pasid_set_address_width(pte, iommu->agaw); 570 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 571 572 /* Setup Present and PASID Granular Transfer Type: */ 573 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); 574 pasid_set_present(pte); 575 pasid_flush_caches(iommu, pte, pasid, did); 576 577 return 0; 578 } 579 580 /* 581 * Skip top levels of page tables for iommu which has less agaw 582 * than default. Unnecessary for PT mode. 583 */ 584 static inline int iommu_skip_agaw(struct dmar_domain *domain, 585 struct intel_iommu *iommu, 586 struct dma_pte **pgd) 587 { 588 int agaw; 589 590 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { 591 *pgd = phys_to_virt(dma_pte_addr(*pgd)); 592 if (!dma_pte_present(*pgd)) 593 return -EINVAL; 594 } 595 596 return agaw; 597 } 598 599 /* 600 * Set up the scalable mode pasid entry for second only translation type. 601 */ 602 int intel_pasid_setup_second_level(struct intel_iommu *iommu, 603 struct dmar_domain *domain, 604 struct device *dev, u32 pasid) 605 { 606 struct pasid_entry *pte; 607 struct dma_pte *pgd; 608 u64 pgd_val; 609 int agaw; 610 u16 did; 611 612 /* 613 * If hardware advertises no support for second level 614 * translation, return directly. 615 */ 616 if (!ecap_slts(iommu->ecap)) { 617 pr_err("No second level translation support on %s\n", 618 iommu->name); 619 return -EINVAL; 620 } 621 622 pgd = domain->pgd; 623 agaw = iommu_skip_agaw(domain, iommu, &pgd); 624 if (agaw < 0) { 625 dev_err(dev, "Invalid domain page table\n"); 626 return -EINVAL; 627 } 628 629 pgd_val = virt_to_phys(pgd); 630 did = domain->iommu_did[iommu->seq_id]; 631 632 pte = intel_pasid_get_entry(dev, pasid); 633 if (!pte) { 634 dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); 635 return -ENODEV; 636 } 637 638 pasid_clear_entry(pte); 639 pasid_set_domain_id(pte, did); 640 pasid_set_slptr(pte, pgd_val); 641 pasid_set_address_width(pte, agaw); 642 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); 643 pasid_set_fault_enable(pte); 644 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 645 646 /* 647 * Since it is a second level only translation setup, we should 648 * set SRE bit as well (addresses are expected to be GPAs). 649 */ 650 pasid_set_sre(pte); 651 pasid_set_present(pte); 652 pasid_flush_caches(iommu, pte, pasid, did); 653 654 return 0; 655 } 656 657 /* 658 * Set up the scalable mode pasid entry for passthrough translation type. 659 */ 660 int intel_pasid_setup_pass_through(struct intel_iommu *iommu, 661 struct dmar_domain *domain, 662 struct device *dev, u32 pasid) 663 { 664 u16 did = FLPT_DEFAULT_DID; 665 struct pasid_entry *pte; 666 667 pte = intel_pasid_get_entry(dev, pasid); 668 if (!pte) { 669 dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); 670 return -ENODEV; 671 } 672 673 pasid_clear_entry(pte); 674 pasid_set_domain_id(pte, did); 675 pasid_set_address_width(pte, iommu->agaw); 676 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); 677 pasid_set_fault_enable(pte); 678 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 679 680 /* 681 * We should set SRE bit as well since the addresses are expected 682 * to be GPAs. 683 */ 684 pasid_set_sre(pte); 685 pasid_set_present(pte); 686 pasid_flush_caches(iommu, pte, pasid, did); 687 688 return 0; 689 } 690 691 static int 692 intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte, 693 struct iommu_gpasid_bind_data_vtd *pasid_data) 694 { 695 /* 696 * Not all guest PASID table entry fields are passed down during bind, 697 * here we only set up the ones that are dependent on guest settings. 698 * Execution related bits such as NXE, SMEP are not supported. 699 * Other fields, such as snoop related, are set based on host needs 700 * regardless of guest settings. 701 */ 702 if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_SRE) { 703 if (!ecap_srs(iommu->ecap)) { 704 pr_err_ratelimited("No supervisor request support on %s\n", 705 iommu->name); 706 return -EINVAL; 707 } 708 pasid_set_sre(pte); 709 } 710 711 if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) { 712 if (!ecap_eafs(iommu->ecap)) { 713 pr_err_ratelimited("No extended access flag support on %s\n", 714 iommu->name); 715 return -EINVAL; 716 } 717 pasid_set_eafe(pte); 718 } 719 720 /* 721 * Memory type is only applicable to devices inside processor coherent 722 * domain. Will add MTS support once coherent devices are available. 723 */ 724 if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_MTS_MASK) { 725 pr_warn_ratelimited("No memory type support %s\n", 726 iommu->name); 727 return -EINVAL; 728 } 729 730 return 0; 731 } 732 733 /** 734 * intel_pasid_setup_nested() - Set up PASID entry for nested translation. 735 * This could be used for guest shared virtual address. In this case, the 736 * first level page tables are used for GVA-GPA translation in the guest, 737 * second level page tables are used for GPA-HPA translation. 738 * 739 * @iommu: IOMMU which the device belong to 740 * @dev: Device to be set up for translation 741 * @gpgd: FLPTPTR: First Level Page translation pointer in GPA 742 * @pasid: PASID to be programmed in the device PASID table 743 * @pasid_data: Additional PASID info from the guest bind request 744 * @domain: Domain info for setting up second level page tables 745 * @addr_width: Address width of the first level (guest) 746 */ 747 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, 748 pgd_t *gpgd, u32 pasid, 749 struct iommu_gpasid_bind_data_vtd *pasid_data, 750 struct dmar_domain *domain, int addr_width) 751 { 752 struct pasid_entry *pte; 753 struct dma_pte *pgd; 754 int ret = 0; 755 u64 pgd_val; 756 int agaw; 757 u16 did; 758 759 if (!ecap_nest(iommu->ecap)) { 760 pr_err_ratelimited("IOMMU: %s: No nested translation support\n", 761 iommu->name); 762 return -EINVAL; 763 } 764 765 if (!(domain->flags & DOMAIN_FLAG_NESTING_MODE)) { 766 pr_err_ratelimited("Domain is not in nesting mode, %x\n", 767 domain->flags); 768 return -EINVAL; 769 } 770 771 pte = intel_pasid_get_entry(dev, pasid); 772 if (WARN_ON(!pte)) 773 return -EINVAL; 774 775 /* 776 * Caller must ensure PASID entry is not in use, i.e. not bind the 777 * same PASID to the same device twice. 778 */ 779 if (pasid_pte_is_present(pte)) 780 return -EBUSY; 781 782 pasid_clear_entry(pte); 783 784 /* Sanity checking performed by caller to make sure address 785 * width matching in two dimensions: 786 * 1. CPU vs. IOMMU 787 * 2. Guest vs. Host. 788 */ 789 switch (addr_width) { 790 #ifdef CONFIG_X86 791 case ADDR_WIDTH_5LEVEL: 792 if (!cpu_feature_enabled(X86_FEATURE_LA57) || 793 !cap_5lp_support(iommu->cap)) { 794 dev_err_ratelimited(dev, 795 "5-level paging not supported\n"); 796 return -EINVAL; 797 } 798 799 pasid_set_flpm(pte, 1); 800 break; 801 #endif 802 case ADDR_WIDTH_4LEVEL: 803 pasid_set_flpm(pte, 0); 804 break; 805 default: 806 dev_err_ratelimited(dev, "Invalid guest address width %d\n", 807 addr_width); 808 return -EINVAL; 809 } 810 811 /* First level PGD is in GPA, must be supported by the second level */ 812 if ((uintptr_t)gpgd > domain->max_addr) { 813 dev_err_ratelimited(dev, 814 "Guest PGD %lx not supported, max %llx\n", 815 (uintptr_t)gpgd, domain->max_addr); 816 return -EINVAL; 817 } 818 pasid_set_flptr(pte, (uintptr_t)gpgd); 819 820 ret = intel_pasid_setup_bind_data(iommu, pte, pasid_data); 821 if (ret) 822 return ret; 823 824 /* Setup the second level based on the given domain */ 825 pgd = domain->pgd; 826 827 agaw = iommu_skip_agaw(domain, iommu, &pgd); 828 if (agaw < 0) { 829 dev_err_ratelimited(dev, "Invalid domain page table\n"); 830 return -EINVAL; 831 } 832 pgd_val = virt_to_phys(pgd); 833 pasid_set_slptr(pte, pgd_val); 834 pasid_set_fault_enable(pte); 835 836 did = domain->iommu_did[iommu->seq_id]; 837 pasid_set_domain_id(pte, did); 838 839 pasid_set_address_width(pte, agaw); 840 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 841 842 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); 843 pasid_set_present(pte); 844 pasid_flush_caches(iommu, pte, pasid, did); 845 846 return ret; 847 } 848