1 // SPDX-License-Identifier: GPL-2.0 2 /** 3 * intel-pasid.c - PASID idr, table and entry manipulation 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/bitops.h> 13 #include <linux/cpufeature.h> 14 #include <linux/dmar.h> 15 #include <linux/intel-iommu.h> 16 #include <linux/iommu.h> 17 #include <linux/memory.h> 18 #include <linux/pci.h> 19 #include <linux/pci-ats.h> 20 #include <linux/spinlock.h> 21 22 #include "pasid.h" 23 24 /* 25 * Intel IOMMU system wide PASID name space: 26 */ 27 static DEFINE_SPINLOCK(pasid_lock); 28 u32 intel_pasid_max_id = PASID_MAX; 29 30 int vcmd_alloc_pasid(struct intel_iommu *iommu, u32 *pasid) 31 { 32 unsigned long flags; 33 u8 status_code; 34 int ret = 0; 35 u64 res; 36 37 raw_spin_lock_irqsave(&iommu->register_lock, flags); 38 dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC); 39 IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, 40 !(res & VCMD_VRSP_IP), res); 41 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 42 43 status_code = VCMD_VRSP_SC(res); 44 switch (status_code) { 45 case VCMD_VRSP_SC_SUCCESS: 46 *pasid = VCMD_VRSP_RESULT_PASID(res); 47 break; 48 case VCMD_VRSP_SC_NO_PASID_AVAIL: 49 pr_info("IOMMU: %s: No PASID available\n", iommu->name); 50 ret = -ENOSPC; 51 break; 52 default: 53 ret = -ENODEV; 54 pr_warn("IOMMU: %s: Unexpected error code %d\n", 55 iommu->name, status_code); 56 } 57 58 return ret; 59 } 60 61 void vcmd_free_pasid(struct intel_iommu *iommu, u32 pasid) 62 { 63 unsigned long flags; 64 u8 status_code; 65 u64 res; 66 67 raw_spin_lock_irqsave(&iommu->register_lock, flags); 68 dmar_writeq(iommu->reg + DMAR_VCMD_REG, 69 VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE); 70 IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, 71 !(res & VCMD_VRSP_IP), res); 72 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 73 74 status_code = VCMD_VRSP_SC(res); 75 switch (status_code) { 76 case VCMD_VRSP_SC_SUCCESS: 77 break; 78 case VCMD_VRSP_SC_INVALID_PASID: 79 pr_info("IOMMU: %s: Invalid PASID\n", iommu->name); 80 break; 81 default: 82 pr_warn("IOMMU: %s: Unexpected error code %d\n", 83 iommu->name, status_code); 84 } 85 } 86 87 /* 88 * Per device pasid table management: 89 */ 90 static inline void 91 device_attach_pasid_table(struct device_domain_info *info, 92 struct pasid_table *pasid_table) 93 { 94 info->pasid_table = pasid_table; 95 list_add(&info->table, &pasid_table->dev); 96 } 97 98 static inline void 99 device_detach_pasid_table(struct device_domain_info *info, 100 struct pasid_table *pasid_table) 101 { 102 info->pasid_table = NULL; 103 list_del(&info->table); 104 } 105 106 struct pasid_table_opaque { 107 struct pasid_table **pasid_table; 108 int segment; 109 int bus; 110 int devfn; 111 }; 112 113 static int search_pasid_table(struct device_domain_info *info, void *opaque) 114 { 115 struct pasid_table_opaque *data = opaque; 116 117 if (info->iommu->segment == data->segment && 118 info->bus == data->bus && 119 info->devfn == data->devfn && 120 info->pasid_table) { 121 *data->pasid_table = info->pasid_table; 122 return 1; 123 } 124 125 return 0; 126 } 127 128 static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque) 129 { 130 struct pasid_table_opaque *data = opaque; 131 132 data->segment = pci_domain_nr(pdev->bus); 133 data->bus = PCI_BUS_NUM(alias); 134 data->devfn = alias & 0xff; 135 136 return for_each_device_domain(&search_pasid_table, data); 137 } 138 139 /* 140 * Allocate a pasid table for @dev. It should be called in a 141 * single-thread context. 142 */ 143 int intel_pasid_alloc_table(struct device *dev) 144 { 145 struct device_domain_info *info; 146 struct pasid_table *pasid_table; 147 struct pasid_table_opaque data; 148 struct page *pages; 149 u32 max_pasid = 0; 150 int ret, order; 151 int size; 152 153 might_sleep(); 154 info = get_domain_info(dev); 155 if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) 156 return -EINVAL; 157 158 /* DMA alias device already has a pasid table, use it: */ 159 data.pasid_table = &pasid_table; 160 ret = pci_for_each_dma_alias(to_pci_dev(dev), 161 &get_alias_pasid_table, &data); 162 if (ret) 163 goto attach_out; 164 165 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); 166 if (!pasid_table) 167 return -ENOMEM; 168 INIT_LIST_HEAD(&pasid_table->dev); 169 170 if (info->pasid_supported) 171 max_pasid = min_t(u32, pci_max_pasids(to_pci_dev(dev)), 172 intel_pasid_max_id); 173 174 size = max_pasid >> (PASID_PDE_SHIFT - 3); 175 order = size ? get_order(size) : 0; 176 pages = alloc_pages_node(info->iommu->node, 177 GFP_KERNEL | __GFP_ZERO, order); 178 if (!pages) { 179 kfree(pasid_table); 180 return -ENOMEM; 181 } 182 183 pasid_table->table = page_address(pages); 184 pasid_table->order = order; 185 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); 186 187 attach_out: 188 device_attach_pasid_table(info, pasid_table); 189 190 return 0; 191 } 192 193 void intel_pasid_free_table(struct device *dev) 194 { 195 struct device_domain_info *info; 196 struct pasid_table *pasid_table; 197 struct pasid_dir_entry *dir; 198 struct pasid_entry *table; 199 int i, max_pde; 200 201 info = get_domain_info(dev); 202 if (!info || !dev_is_pci(dev) || !info->pasid_table) 203 return; 204 205 pasid_table = info->pasid_table; 206 device_detach_pasid_table(info, pasid_table); 207 208 if (!list_empty(&pasid_table->dev)) 209 return; 210 211 /* Free scalable mode PASID directory tables: */ 212 dir = pasid_table->table; 213 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; 214 for (i = 0; i < max_pde; i++) { 215 table = get_pasid_table_from_pde(&dir[i]); 216 free_pgtable_page(table); 217 } 218 219 free_pages((unsigned long)pasid_table->table, pasid_table->order); 220 kfree(pasid_table); 221 } 222 223 struct pasid_table *intel_pasid_get_table(struct device *dev) 224 { 225 struct device_domain_info *info; 226 227 info = get_domain_info(dev); 228 if (!info) 229 return NULL; 230 231 return info->pasid_table; 232 } 233 234 int intel_pasid_get_dev_max_id(struct device *dev) 235 { 236 struct device_domain_info *info; 237 238 info = get_domain_info(dev); 239 if (!info || !info->pasid_table) 240 return 0; 241 242 return info->pasid_table->max_pasid; 243 } 244 245 struct pasid_entry *intel_pasid_get_entry(struct device *dev, u32 pasid) 246 { 247 struct device_domain_info *info; 248 struct pasid_table *pasid_table; 249 struct pasid_dir_entry *dir; 250 struct pasid_entry *entries; 251 int dir_index, index; 252 253 pasid_table = intel_pasid_get_table(dev); 254 if (WARN_ON(!pasid_table || pasid >= intel_pasid_get_dev_max_id(dev))) 255 return NULL; 256 257 dir = pasid_table->table; 258 info = get_domain_info(dev); 259 dir_index = pasid >> PASID_PDE_SHIFT; 260 index = pasid & PASID_PTE_MASK; 261 262 spin_lock(&pasid_lock); 263 entries = get_pasid_table_from_pde(&dir[dir_index]); 264 if (!entries) { 265 entries = alloc_pgtable_page(info->iommu->node); 266 if (!entries) { 267 spin_unlock(&pasid_lock); 268 return NULL; 269 } 270 271 WRITE_ONCE(dir[dir_index].val, 272 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT); 273 } 274 spin_unlock(&pasid_lock); 275 276 return &entries[index]; 277 } 278 279 /* 280 * Interfaces for PASID table entry manipulation: 281 */ 282 static inline void pasid_clear_entry(struct pasid_entry *pe) 283 { 284 WRITE_ONCE(pe->val[0], 0); 285 WRITE_ONCE(pe->val[1], 0); 286 WRITE_ONCE(pe->val[2], 0); 287 WRITE_ONCE(pe->val[3], 0); 288 WRITE_ONCE(pe->val[4], 0); 289 WRITE_ONCE(pe->val[5], 0); 290 WRITE_ONCE(pe->val[6], 0); 291 WRITE_ONCE(pe->val[7], 0); 292 } 293 294 static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) 295 { 296 WRITE_ONCE(pe->val[0], PASID_PTE_FPD); 297 WRITE_ONCE(pe->val[1], 0); 298 WRITE_ONCE(pe->val[2], 0); 299 WRITE_ONCE(pe->val[3], 0); 300 WRITE_ONCE(pe->val[4], 0); 301 WRITE_ONCE(pe->val[5], 0); 302 WRITE_ONCE(pe->val[6], 0); 303 WRITE_ONCE(pe->val[7], 0); 304 } 305 306 static void 307 intel_pasid_clear_entry(struct device *dev, u32 pasid, bool fault_ignore) 308 { 309 struct pasid_entry *pe; 310 311 pe = intel_pasid_get_entry(dev, pasid); 312 if (WARN_ON(!pe)) 313 return; 314 315 if (fault_ignore && pasid_pte_is_present(pe)) 316 pasid_clear_entry_with_fpd(pe); 317 else 318 pasid_clear_entry(pe); 319 } 320 321 static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) 322 { 323 u64 old; 324 325 old = READ_ONCE(*ptr); 326 WRITE_ONCE(*ptr, (old & ~mask) | bits); 327 } 328 329 /* 330 * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode 331 * PASID entry. 332 */ 333 static inline void 334 pasid_set_domain_id(struct pasid_entry *pe, u64 value) 335 { 336 pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value); 337 } 338 339 /* 340 * Get domain ID value of a scalable mode PASID entry. 341 */ 342 static inline u16 343 pasid_get_domain_id(struct pasid_entry *pe) 344 { 345 return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0)); 346 } 347 348 /* 349 * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63) 350 * of a scalable mode PASID entry. 351 */ 352 static inline void 353 pasid_set_slptr(struct pasid_entry *pe, u64 value) 354 { 355 pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value); 356 } 357 358 /* 359 * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID 360 * entry. 361 */ 362 static inline void 363 pasid_set_address_width(struct pasid_entry *pe, u64 value) 364 { 365 pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2); 366 } 367 368 /* 369 * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8) 370 * of a scalable mode PASID entry. 371 */ 372 static inline void 373 pasid_set_translation_type(struct pasid_entry *pe, u64 value) 374 { 375 pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6); 376 } 377 378 /* 379 * Enable fault processing by clearing the FPD(Fault Processing 380 * Disable) field (Bit 1) of a scalable mode PASID entry. 381 */ 382 static inline void pasid_set_fault_enable(struct pasid_entry *pe) 383 { 384 pasid_set_bits(&pe->val[0], 1 << 1, 0); 385 } 386 387 /* 388 * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a 389 * scalable mode PASID entry. 390 */ 391 static inline void pasid_set_sre(struct pasid_entry *pe) 392 { 393 pasid_set_bits(&pe->val[2], 1 << 0, 1); 394 } 395 396 /* 397 * Setup the P(Present) field (Bit 0) of a scalable mode PASID 398 * entry. 399 */ 400 static inline void pasid_set_present(struct pasid_entry *pe) 401 { 402 pasid_set_bits(&pe->val[0], 1 << 0, 1); 403 } 404 405 /* 406 * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID 407 * entry. 408 */ 409 static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) 410 { 411 pasid_set_bits(&pe->val[1], 1 << 23, value << 23); 412 } 413 414 /* 415 * Setup the First Level Page table Pointer field (Bit 140~191) 416 * of a scalable mode PASID entry. 417 */ 418 static inline void 419 pasid_set_flptr(struct pasid_entry *pe, u64 value) 420 { 421 pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value); 422 } 423 424 /* 425 * Setup the First Level Paging Mode field (Bit 130~131) of a 426 * scalable mode PASID entry. 427 */ 428 static inline void 429 pasid_set_flpm(struct pasid_entry *pe, u64 value) 430 { 431 pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); 432 } 433 434 /* 435 * Setup the Extended Access Flag Enable (EAFE) field (Bit 135) 436 * of a scalable mode PASID entry. 437 */ 438 static inline void 439 pasid_set_eafe(struct pasid_entry *pe) 440 { 441 pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7); 442 } 443 444 static void 445 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, 446 u16 did, u32 pasid) 447 { 448 struct qi_desc desc; 449 450 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) | 451 QI_PC_PASID(pasid) | QI_PC_TYPE; 452 desc.qw1 = 0; 453 desc.qw2 = 0; 454 desc.qw3 = 0; 455 456 qi_submit_sync(iommu, &desc, 1, 0); 457 } 458 459 static void 460 iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) 461 { 462 struct qi_desc desc; 463 464 desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | 465 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; 466 desc.qw1 = 0; 467 desc.qw2 = 0; 468 desc.qw3 = 0; 469 470 qi_submit_sync(iommu, &desc, 1, 0); 471 } 472 473 static void 474 devtlb_invalidation_with_pasid(struct intel_iommu *iommu, 475 struct device *dev, u32 pasid) 476 { 477 struct device_domain_info *info; 478 u16 sid, qdep, pfsid; 479 480 info = get_domain_info(dev); 481 if (!info || !info->ats_enabled) 482 return; 483 484 sid = info->bus << 8 | info->devfn; 485 qdep = info->ats_qdep; 486 pfsid = info->pfsid; 487 488 /* 489 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID), 490 * devTLB flush w/o PASID should be used. For non-zero PASID under 491 * SVA usage, device could do DMA with multiple PASIDs. It is more 492 * efficient to flush devTLB specific to the PASID. 493 */ 494 if (pasid == PASID_RID2PASID) 495 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); 496 else 497 qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); 498 } 499 500 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, 501 u32 pasid, bool fault_ignore) 502 { 503 struct pasid_entry *pte; 504 u16 did; 505 506 pte = intel_pasid_get_entry(dev, pasid); 507 if (WARN_ON(!pte)) 508 return; 509 510 did = pasid_get_domain_id(pte); 511 intel_pasid_clear_entry(dev, pasid, fault_ignore); 512 513 if (!ecap_coherent(iommu->ecap)) 514 clflush_cache_range(pte, sizeof(*pte)); 515 516 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 517 iotlb_invalidation_with_pasid(iommu, did, pasid); 518 519 /* Device IOTLB doesn't need to be flushed in caching mode. */ 520 if (!cap_caching_mode(iommu->cap)) 521 devtlb_invalidation_with_pasid(iommu, dev, pasid); 522 } 523 524 static void pasid_flush_caches(struct intel_iommu *iommu, 525 struct pasid_entry *pte, 526 u32 pasid, u16 did) 527 { 528 if (!ecap_coherent(iommu->ecap)) 529 clflush_cache_range(pte, sizeof(*pte)); 530 531 if (cap_caching_mode(iommu->cap)) { 532 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 533 iotlb_invalidation_with_pasid(iommu, did, pasid); 534 } else { 535 iommu_flush_write_buffer(iommu); 536 } 537 } 538 539 /* 540 * Set up the scalable mode pasid table entry for first only 541 * translation type. 542 */ 543 int intel_pasid_setup_first_level(struct intel_iommu *iommu, 544 struct device *dev, pgd_t *pgd, 545 u32 pasid, u16 did, int flags) 546 { 547 struct pasid_entry *pte; 548 549 if (!ecap_flts(iommu->ecap)) { 550 pr_err("No first level translation support on %s\n", 551 iommu->name); 552 return -EINVAL; 553 } 554 555 pte = intel_pasid_get_entry(dev, pasid); 556 if (WARN_ON(!pte)) 557 return -EINVAL; 558 559 pasid_clear_entry(pte); 560 561 /* Setup the first level page table pointer: */ 562 pasid_set_flptr(pte, (u64)__pa(pgd)); 563 if (flags & PASID_FLAG_SUPERVISOR_MODE) { 564 if (!ecap_srs(iommu->ecap)) { 565 pr_err("No supervisor request support on %s\n", 566 iommu->name); 567 return -EINVAL; 568 } 569 pasid_set_sre(pte); 570 } 571 572 if (flags & PASID_FLAG_FL5LP) { 573 if (cap_5lp_support(iommu->cap)) { 574 pasid_set_flpm(pte, 1); 575 } else { 576 pr_err("No 5-level paging support for first-level\n"); 577 pasid_clear_entry(pte); 578 return -EINVAL; 579 } 580 } 581 582 pasid_set_domain_id(pte, did); 583 pasid_set_address_width(pte, iommu->agaw); 584 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 585 586 /* Setup Present and PASID Granular Transfer Type: */ 587 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); 588 pasid_set_present(pte); 589 pasid_flush_caches(iommu, pte, pasid, did); 590 591 return 0; 592 } 593 594 /* 595 * Skip top levels of page tables for iommu which has less agaw 596 * than default. Unnecessary for PT mode. 597 */ 598 static inline int iommu_skip_agaw(struct dmar_domain *domain, 599 struct intel_iommu *iommu, 600 struct dma_pte **pgd) 601 { 602 int agaw; 603 604 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { 605 *pgd = phys_to_virt(dma_pte_addr(*pgd)); 606 if (!dma_pte_present(*pgd)) 607 return -EINVAL; 608 } 609 610 return agaw; 611 } 612 613 /* 614 * Set up the scalable mode pasid entry for second only translation type. 615 */ 616 int intel_pasid_setup_second_level(struct intel_iommu *iommu, 617 struct dmar_domain *domain, 618 struct device *dev, u32 pasid) 619 { 620 struct pasid_entry *pte; 621 struct dma_pte *pgd; 622 u64 pgd_val; 623 int agaw; 624 u16 did; 625 626 /* 627 * If hardware advertises no support for second level 628 * translation, return directly. 629 */ 630 if (!ecap_slts(iommu->ecap)) { 631 pr_err("No second level translation support on %s\n", 632 iommu->name); 633 return -EINVAL; 634 } 635 636 pgd = domain->pgd; 637 agaw = iommu_skip_agaw(domain, iommu, &pgd); 638 if (agaw < 0) { 639 dev_err(dev, "Invalid domain page table\n"); 640 return -EINVAL; 641 } 642 643 pgd_val = virt_to_phys(pgd); 644 did = domain->iommu_did[iommu->seq_id]; 645 646 pte = intel_pasid_get_entry(dev, pasid); 647 if (!pte) { 648 dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); 649 return -ENODEV; 650 } 651 652 pasid_clear_entry(pte); 653 pasid_set_domain_id(pte, did); 654 pasid_set_slptr(pte, pgd_val); 655 pasid_set_address_width(pte, agaw); 656 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); 657 pasid_set_fault_enable(pte); 658 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 659 660 /* 661 * Since it is a second level only translation setup, we should 662 * set SRE bit as well (addresses are expected to be GPAs). 663 */ 664 pasid_set_sre(pte); 665 pasid_set_present(pte); 666 pasid_flush_caches(iommu, pte, pasid, did); 667 668 return 0; 669 } 670 671 /* 672 * Set up the scalable mode pasid entry for passthrough translation type. 673 */ 674 int intel_pasid_setup_pass_through(struct intel_iommu *iommu, 675 struct dmar_domain *domain, 676 struct device *dev, u32 pasid) 677 { 678 u16 did = FLPT_DEFAULT_DID; 679 struct pasid_entry *pte; 680 681 pte = intel_pasid_get_entry(dev, pasid); 682 if (!pte) { 683 dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); 684 return -ENODEV; 685 } 686 687 pasid_clear_entry(pte); 688 pasid_set_domain_id(pte, did); 689 pasid_set_address_width(pte, iommu->agaw); 690 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); 691 pasid_set_fault_enable(pte); 692 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 693 694 /* 695 * We should set SRE bit as well since the addresses are expected 696 * to be GPAs. 697 */ 698 pasid_set_sre(pte); 699 pasid_set_present(pte); 700 pasid_flush_caches(iommu, pte, pasid, did); 701 702 return 0; 703 } 704 705 static int 706 intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte, 707 struct iommu_gpasid_bind_data_vtd *pasid_data) 708 { 709 /* 710 * Not all guest PASID table entry fields are passed down during bind, 711 * here we only set up the ones that are dependent on guest settings. 712 * Execution related bits such as NXE, SMEP are not supported. 713 * Other fields, such as snoop related, are set based on host needs 714 * regardless of guest settings. 715 */ 716 if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_SRE) { 717 if (!ecap_srs(iommu->ecap)) { 718 pr_err_ratelimited("No supervisor request support on %s\n", 719 iommu->name); 720 return -EINVAL; 721 } 722 pasid_set_sre(pte); 723 } 724 725 if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) { 726 if (!ecap_eafs(iommu->ecap)) { 727 pr_err_ratelimited("No extended access flag support on %s\n", 728 iommu->name); 729 return -EINVAL; 730 } 731 pasid_set_eafe(pte); 732 } 733 734 /* 735 * Memory type is only applicable to devices inside processor coherent 736 * domain. Will add MTS support once coherent devices are available. 737 */ 738 if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_MTS_MASK) { 739 pr_warn_ratelimited("No memory type support %s\n", 740 iommu->name); 741 return -EINVAL; 742 } 743 744 return 0; 745 } 746 747 /** 748 * intel_pasid_setup_nested() - Set up PASID entry for nested translation. 749 * This could be used for guest shared virtual address. In this case, the 750 * first level page tables are used for GVA-GPA translation in the guest, 751 * second level page tables are used for GPA-HPA translation. 752 * 753 * @iommu: IOMMU which the device belong to 754 * @dev: Device to be set up for translation 755 * @gpgd: FLPTPTR: First Level Page translation pointer in GPA 756 * @pasid: PASID to be programmed in the device PASID table 757 * @pasid_data: Additional PASID info from the guest bind request 758 * @domain: Domain info for setting up second level page tables 759 * @addr_width: Address width of the first level (guest) 760 */ 761 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, 762 pgd_t *gpgd, u32 pasid, 763 struct iommu_gpasid_bind_data_vtd *pasid_data, 764 struct dmar_domain *domain, int addr_width) 765 { 766 struct pasid_entry *pte; 767 struct dma_pte *pgd; 768 int ret = 0; 769 u64 pgd_val; 770 int agaw; 771 u16 did; 772 773 if (!ecap_nest(iommu->ecap)) { 774 pr_err_ratelimited("IOMMU: %s: No nested translation support\n", 775 iommu->name); 776 return -EINVAL; 777 } 778 779 if (!(domain->flags & DOMAIN_FLAG_NESTING_MODE)) { 780 pr_err_ratelimited("Domain is not in nesting mode, %x\n", 781 domain->flags); 782 return -EINVAL; 783 } 784 785 pte = intel_pasid_get_entry(dev, pasid); 786 if (WARN_ON(!pte)) 787 return -EINVAL; 788 789 /* 790 * Caller must ensure PASID entry is not in use, i.e. not bind the 791 * same PASID to the same device twice. 792 */ 793 if (pasid_pte_is_present(pte)) 794 return -EBUSY; 795 796 pasid_clear_entry(pte); 797 798 /* Sanity checking performed by caller to make sure address 799 * width matching in two dimensions: 800 * 1. CPU vs. IOMMU 801 * 2. Guest vs. Host. 802 */ 803 switch (addr_width) { 804 #ifdef CONFIG_X86 805 case ADDR_WIDTH_5LEVEL: 806 if (!cpu_feature_enabled(X86_FEATURE_LA57) || 807 !cap_5lp_support(iommu->cap)) { 808 dev_err_ratelimited(dev, 809 "5-level paging not supported\n"); 810 return -EINVAL; 811 } 812 813 pasid_set_flpm(pte, 1); 814 break; 815 #endif 816 case ADDR_WIDTH_4LEVEL: 817 pasid_set_flpm(pte, 0); 818 break; 819 default: 820 dev_err_ratelimited(dev, "Invalid guest address width %d\n", 821 addr_width); 822 return -EINVAL; 823 } 824 825 /* First level PGD is in GPA, must be supported by the second level */ 826 if ((uintptr_t)gpgd > domain->max_addr) { 827 dev_err_ratelimited(dev, 828 "Guest PGD %lx not supported, max %llx\n", 829 (uintptr_t)gpgd, domain->max_addr); 830 return -EINVAL; 831 } 832 pasid_set_flptr(pte, (uintptr_t)gpgd); 833 834 ret = intel_pasid_setup_bind_data(iommu, pte, pasid_data); 835 if (ret) 836 return ret; 837 838 /* Setup the second level based on the given domain */ 839 pgd = domain->pgd; 840 841 agaw = iommu_skip_agaw(domain, iommu, &pgd); 842 if (agaw < 0) { 843 dev_err_ratelimited(dev, "Invalid domain page table\n"); 844 return -EINVAL; 845 } 846 pgd_val = virt_to_phys(pgd); 847 pasid_set_slptr(pte, pgd_val); 848 pasid_set_fault_enable(pte); 849 850 did = domain->iommu_did[iommu->seq_id]; 851 pasid_set_domain_id(pte, did); 852 853 pasid_set_address_width(pte, agaw); 854 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 855 856 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); 857 pasid_set_present(pte); 858 pasid_flush_caches(iommu, pte, pasid, did); 859 860 return ret; 861 } 862