1 // SPDX-License-Identifier: GPL-2.0 2 /** 3 * intel-pasid.c - PASID idr, table and entry manipulation 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/bitops.h> 13 #include <linux/cpufeature.h> 14 #include <linux/dmar.h> 15 #include <linux/intel-iommu.h> 16 #include <linux/iommu.h> 17 #include <linux/memory.h> 18 #include <linux/pci.h> 19 #include <linux/pci-ats.h> 20 #include <linux/spinlock.h> 21 22 #include "pasid.h" 23 24 /* 25 * Intel IOMMU system wide PASID name space: 26 */ 27 static DEFINE_SPINLOCK(pasid_lock); 28 u32 intel_pasid_max_id = PASID_MAX; 29 30 int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid) 31 { 32 unsigned long flags; 33 u8 status_code; 34 int ret = 0; 35 u64 res; 36 37 raw_spin_lock_irqsave(&iommu->register_lock, flags); 38 dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC); 39 IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, 40 !(res & VCMD_VRSP_IP), res); 41 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 42 43 status_code = VCMD_VRSP_SC(res); 44 switch (status_code) { 45 case VCMD_VRSP_SC_SUCCESS: 46 *pasid = VCMD_VRSP_RESULT_PASID(res); 47 break; 48 case VCMD_VRSP_SC_NO_PASID_AVAIL: 49 pr_info("IOMMU: %s: No PASID available\n", iommu->name); 50 ret = -ENOSPC; 51 break; 52 default: 53 ret = -ENODEV; 54 pr_warn("IOMMU: %s: Unexpected error code %d\n", 55 iommu->name, status_code); 56 } 57 58 return ret; 59 } 60 61 void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid) 62 { 63 unsigned long flags; 64 u8 status_code; 65 u64 res; 66 67 raw_spin_lock_irqsave(&iommu->register_lock, flags); 68 dmar_writeq(iommu->reg + DMAR_VCMD_REG, 69 VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE); 70 IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, 71 !(res & VCMD_VRSP_IP), res); 72 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 73 74 status_code = VCMD_VRSP_SC(res); 75 switch (status_code) { 76 case VCMD_VRSP_SC_SUCCESS: 77 break; 78 case VCMD_VRSP_SC_INVALID_PASID: 79 pr_info("IOMMU: %s: Invalid PASID\n", iommu->name); 80 break; 81 default: 82 pr_warn("IOMMU: %s: Unexpected error code %d\n", 83 iommu->name, status_code); 84 } 85 } 86 87 /* 88 * Per device pasid table management: 89 */ 90 static inline void 91 device_attach_pasid_table(struct device_domain_info *info, 92 struct pasid_table *pasid_table) 93 { 94 info->pasid_table = pasid_table; 95 list_add(&info->table, &pasid_table->dev); 96 } 97 98 static inline void 99 device_detach_pasid_table(struct device_domain_info *info, 100 struct pasid_table *pasid_table) 101 { 102 info->pasid_table = NULL; 103 list_del(&info->table); 104 } 105 106 struct pasid_table_opaque { 107 struct pasid_table **pasid_table; 108 int segment; 109 int bus; 110 int devfn; 111 }; 112 113 static int search_pasid_table(struct device_domain_info *info, void *opaque) 114 { 115 struct pasid_table_opaque *data = opaque; 116 117 if (info->iommu->segment == data->segment && 118 info->bus == data->bus && 119 info->devfn == data->devfn && 120 info->pasid_table) { 121 *data->pasid_table = info->pasid_table; 122 return 1; 123 } 124 125 return 0; 126 } 127 128 static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque) 129 { 130 struct pasid_table_opaque *data = opaque; 131 132 data->segment = pci_domain_nr(pdev->bus); 133 data->bus = PCI_BUS_NUM(alias); 134 data->devfn = alias & 0xff; 135 136 return for_each_device_domain(&search_pasid_table, data); 137 } 138 139 /* 140 * Allocate a pasid table for @dev. It should be called in a 141 * single-thread context. 142 */ 143 int intel_pasid_alloc_table(struct device *dev) 144 { 145 struct device_domain_info *info; 146 struct pasid_table *pasid_table; 147 struct pasid_table_opaque data; 148 struct page *pages; 149 int max_pasid = 0; 150 int ret, order; 151 int size; 152 153 might_sleep(); 154 info = get_domain_info(dev); 155 if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) 156 return -EINVAL; 157 158 /* DMA alias device already has a pasid table, use it: */ 159 data.pasid_table = &pasid_table; 160 ret = pci_for_each_dma_alias(to_pci_dev(dev), 161 &get_alias_pasid_table, &data); 162 if (ret) 163 goto attach_out; 164 165 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); 166 if (!pasid_table) 167 return -ENOMEM; 168 INIT_LIST_HEAD(&pasid_table->dev); 169 170 if (info->pasid_supported) 171 max_pasid = min_t(int, pci_max_pasids(to_pci_dev(dev)), 172 intel_pasid_max_id); 173 174 size = max_pasid >> (PASID_PDE_SHIFT - 3); 175 order = size ? get_order(size) : 0; 176 pages = alloc_pages_node(info->iommu->node, 177 GFP_KERNEL | __GFP_ZERO, order); 178 if (!pages) { 179 kfree(pasid_table); 180 return -ENOMEM; 181 } 182 183 pasid_table->table = page_address(pages); 184 pasid_table->order = order; 185 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); 186 187 attach_out: 188 device_attach_pasid_table(info, pasid_table); 189 190 return 0; 191 } 192 193 void intel_pasid_free_table(struct device *dev) 194 { 195 struct device_domain_info *info; 196 struct pasid_table *pasid_table; 197 struct pasid_dir_entry *dir; 198 struct pasid_entry *table; 199 int i, max_pde; 200 201 info = get_domain_info(dev); 202 if (!info || !dev_is_pci(dev) || !info->pasid_table) 203 return; 204 205 pasid_table = info->pasid_table; 206 device_detach_pasid_table(info, pasid_table); 207 208 if (!list_empty(&pasid_table->dev)) 209 return; 210 211 /* Free scalable mode PASID directory tables: */ 212 dir = pasid_table->table; 213 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; 214 for (i = 0; i < max_pde; i++) { 215 table = get_pasid_table_from_pde(&dir[i]); 216 free_pgtable_page(table); 217 } 218 219 free_pages((unsigned long)pasid_table->table, pasid_table->order); 220 kfree(pasid_table); 221 } 222 223 struct pasid_table *intel_pasid_get_table(struct device *dev) 224 { 225 struct device_domain_info *info; 226 227 info = get_domain_info(dev); 228 if (!info) 229 return NULL; 230 231 return info->pasid_table; 232 } 233 234 int intel_pasid_get_dev_max_id(struct device *dev) 235 { 236 struct device_domain_info *info; 237 238 info = get_domain_info(dev); 239 if (!info || !info->pasid_table) 240 return 0; 241 242 return info->pasid_table->max_pasid; 243 } 244 245 struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) 246 { 247 struct device_domain_info *info; 248 struct pasid_table *pasid_table; 249 struct pasid_dir_entry *dir; 250 struct pasid_entry *entries; 251 int dir_index, index; 252 253 pasid_table = intel_pasid_get_table(dev); 254 if (WARN_ON(!pasid_table || pasid < 0 || 255 pasid >= intel_pasid_get_dev_max_id(dev))) 256 return NULL; 257 258 dir = pasid_table->table; 259 info = get_domain_info(dev); 260 dir_index = pasid >> PASID_PDE_SHIFT; 261 index = pasid & PASID_PTE_MASK; 262 263 spin_lock(&pasid_lock); 264 entries = get_pasid_table_from_pde(&dir[dir_index]); 265 if (!entries) { 266 entries = alloc_pgtable_page(info->iommu->node); 267 if (!entries) { 268 spin_unlock(&pasid_lock); 269 return NULL; 270 } 271 272 WRITE_ONCE(dir[dir_index].val, 273 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT); 274 } 275 spin_unlock(&pasid_lock); 276 277 return &entries[index]; 278 } 279 280 /* 281 * Interfaces for PASID table entry manipulation: 282 */ 283 static inline void pasid_clear_entry(struct pasid_entry *pe) 284 { 285 WRITE_ONCE(pe->val[0], 0); 286 WRITE_ONCE(pe->val[1], 0); 287 WRITE_ONCE(pe->val[2], 0); 288 WRITE_ONCE(pe->val[3], 0); 289 WRITE_ONCE(pe->val[4], 0); 290 WRITE_ONCE(pe->val[5], 0); 291 WRITE_ONCE(pe->val[6], 0); 292 WRITE_ONCE(pe->val[7], 0); 293 } 294 295 static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) 296 { 297 WRITE_ONCE(pe->val[0], PASID_PTE_FPD); 298 WRITE_ONCE(pe->val[1], 0); 299 WRITE_ONCE(pe->val[2], 0); 300 WRITE_ONCE(pe->val[3], 0); 301 WRITE_ONCE(pe->val[4], 0); 302 WRITE_ONCE(pe->val[5], 0); 303 WRITE_ONCE(pe->val[6], 0); 304 WRITE_ONCE(pe->val[7], 0); 305 } 306 307 static void 308 intel_pasid_clear_entry(struct device *dev, int pasid, bool fault_ignore) 309 { 310 struct pasid_entry *pe; 311 312 pe = intel_pasid_get_entry(dev, pasid); 313 if (WARN_ON(!pe)) 314 return; 315 316 if (fault_ignore && pasid_pte_is_present(pe)) 317 pasid_clear_entry_with_fpd(pe); 318 else 319 pasid_clear_entry(pe); 320 } 321 322 static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) 323 { 324 u64 old; 325 326 old = READ_ONCE(*ptr); 327 WRITE_ONCE(*ptr, (old & ~mask) | bits); 328 } 329 330 /* 331 * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode 332 * PASID entry. 333 */ 334 static inline void 335 pasid_set_domain_id(struct pasid_entry *pe, u64 value) 336 { 337 pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value); 338 } 339 340 /* 341 * Get domain ID value of a scalable mode PASID entry. 342 */ 343 static inline u16 344 pasid_get_domain_id(struct pasid_entry *pe) 345 { 346 return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0)); 347 } 348 349 /* 350 * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63) 351 * of a scalable mode PASID entry. 352 */ 353 static inline void 354 pasid_set_slptr(struct pasid_entry *pe, u64 value) 355 { 356 pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value); 357 } 358 359 /* 360 * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID 361 * entry. 362 */ 363 static inline void 364 pasid_set_address_width(struct pasid_entry *pe, u64 value) 365 { 366 pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2); 367 } 368 369 /* 370 * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8) 371 * of a scalable mode PASID entry. 372 */ 373 static inline void 374 pasid_set_translation_type(struct pasid_entry *pe, u64 value) 375 { 376 pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6); 377 } 378 379 /* 380 * Enable fault processing by clearing the FPD(Fault Processing 381 * Disable) field (Bit 1) of a scalable mode PASID entry. 382 */ 383 static inline void pasid_set_fault_enable(struct pasid_entry *pe) 384 { 385 pasid_set_bits(&pe->val[0], 1 << 1, 0); 386 } 387 388 /* 389 * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a 390 * scalable mode PASID entry. 391 */ 392 static inline void pasid_set_sre(struct pasid_entry *pe) 393 { 394 pasid_set_bits(&pe->val[2], 1 << 0, 1); 395 } 396 397 /* 398 * Setup the P(Present) field (Bit 0) of a scalable mode PASID 399 * entry. 400 */ 401 static inline void pasid_set_present(struct pasid_entry *pe) 402 { 403 pasid_set_bits(&pe->val[0], 1 << 0, 1); 404 } 405 406 /* 407 * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID 408 * entry. 409 */ 410 static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) 411 { 412 pasid_set_bits(&pe->val[1], 1 << 23, value << 23); 413 } 414 415 /* 416 * Setup the First Level Page table Pointer field (Bit 140~191) 417 * of a scalable mode PASID entry. 418 */ 419 static inline void 420 pasid_set_flptr(struct pasid_entry *pe, u64 value) 421 { 422 pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value); 423 } 424 425 /* 426 * Setup the First Level Paging Mode field (Bit 130~131) of a 427 * scalable mode PASID entry. 428 */ 429 static inline void 430 pasid_set_flpm(struct pasid_entry *pe, u64 value) 431 { 432 pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); 433 } 434 435 /* 436 * Setup the Extended Access Flag Enable (EAFE) field (Bit 135) 437 * of a scalable mode PASID entry. 438 */ 439 static inline void 440 pasid_set_eafe(struct pasid_entry *pe) 441 { 442 pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7); 443 } 444 445 static void 446 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, 447 u16 did, int pasid) 448 { 449 struct qi_desc desc; 450 451 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) | 452 QI_PC_PASID(pasid) | QI_PC_TYPE; 453 desc.qw1 = 0; 454 desc.qw2 = 0; 455 desc.qw3 = 0; 456 457 qi_submit_sync(iommu, &desc, 1, 0); 458 } 459 460 static void 461 iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) 462 { 463 struct qi_desc desc; 464 465 desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | 466 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; 467 desc.qw1 = 0; 468 desc.qw2 = 0; 469 desc.qw3 = 0; 470 471 qi_submit_sync(iommu, &desc, 1, 0); 472 } 473 474 static void 475 devtlb_invalidation_with_pasid(struct intel_iommu *iommu, 476 struct device *dev, int pasid) 477 { 478 struct device_domain_info *info; 479 u16 sid, qdep, pfsid; 480 481 info = get_domain_info(dev); 482 if (!info || !info->ats_enabled) 483 return; 484 485 sid = info->bus << 8 | info->devfn; 486 qdep = info->ats_qdep; 487 pfsid = info->pfsid; 488 489 /* 490 * When PASID 0 is used, it indicates RID2PASID(DMA request w/o PASID), 491 * devTLB flush w/o PASID should be used. For non-zero PASID under 492 * SVA usage, device could do DMA with multiple PASIDs. It is more 493 * efficient to flush devTLB specific to the PASID. 494 */ 495 if (pasid == PASID_RID2PASID) 496 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); 497 else 498 qi_flush_dev_iotlb_pasid(iommu, sid, pfsid, pasid, qdep, 0, 64 - VTD_PAGE_SHIFT); 499 } 500 501 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, 502 int pasid, bool fault_ignore) 503 { 504 struct pasid_entry *pte; 505 u16 did; 506 507 pte = intel_pasid_get_entry(dev, pasid); 508 if (WARN_ON(!pte)) 509 return; 510 511 did = pasid_get_domain_id(pte); 512 intel_pasid_clear_entry(dev, pasid, fault_ignore); 513 514 if (!ecap_coherent(iommu->ecap)) 515 clflush_cache_range(pte, sizeof(*pte)); 516 517 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 518 iotlb_invalidation_with_pasid(iommu, did, pasid); 519 520 /* Device IOTLB doesn't need to be flushed in caching mode. */ 521 if (!cap_caching_mode(iommu->cap)) 522 devtlb_invalidation_with_pasid(iommu, dev, pasid); 523 } 524 525 static void pasid_flush_caches(struct intel_iommu *iommu, 526 struct pasid_entry *pte, 527 int pasid, u16 did) 528 { 529 if (!ecap_coherent(iommu->ecap)) 530 clflush_cache_range(pte, sizeof(*pte)); 531 532 if (cap_caching_mode(iommu->cap)) { 533 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 534 iotlb_invalidation_with_pasid(iommu, did, pasid); 535 } else { 536 iommu_flush_write_buffer(iommu); 537 } 538 } 539 540 /* 541 * Set up the scalable mode pasid table entry for first only 542 * translation type. 543 */ 544 int intel_pasid_setup_first_level(struct intel_iommu *iommu, 545 struct device *dev, pgd_t *pgd, 546 int pasid, u16 did, int flags) 547 { 548 struct pasid_entry *pte; 549 550 if (!ecap_flts(iommu->ecap)) { 551 pr_err("No first level translation support on %s\n", 552 iommu->name); 553 return -EINVAL; 554 } 555 556 pte = intel_pasid_get_entry(dev, pasid); 557 if (WARN_ON(!pte)) 558 return -EINVAL; 559 560 pasid_clear_entry(pte); 561 562 /* Setup the first level page table pointer: */ 563 pasid_set_flptr(pte, (u64)__pa(pgd)); 564 if (flags & PASID_FLAG_SUPERVISOR_MODE) { 565 if (!ecap_srs(iommu->ecap)) { 566 pr_err("No supervisor request support on %s\n", 567 iommu->name); 568 return -EINVAL; 569 } 570 pasid_set_sre(pte); 571 } 572 573 if (flags & PASID_FLAG_FL5LP) { 574 if (cap_5lp_support(iommu->cap)) { 575 pasid_set_flpm(pte, 1); 576 } else { 577 pr_err("No 5-level paging support for first-level\n"); 578 pasid_clear_entry(pte); 579 return -EINVAL; 580 } 581 } 582 583 pasid_set_domain_id(pte, did); 584 pasid_set_address_width(pte, iommu->agaw); 585 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 586 587 /* Setup Present and PASID Granular Transfer Type: */ 588 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); 589 pasid_set_present(pte); 590 pasid_flush_caches(iommu, pte, pasid, did); 591 592 return 0; 593 } 594 595 /* 596 * Skip top levels of page tables for iommu which has less agaw 597 * than default. Unnecessary for PT mode. 598 */ 599 static inline int iommu_skip_agaw(struct dmar_domain *domain, 600 struct intel_iommu *iommu, 601 struct dma_pte **pgd) 602 { 603 int agaw; 604 605 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { 606 *pgd = phys_to_virt(dma_pte_addr(*pgd)); 607 if (!dma_pte_present(*pgd)) 608 return -EINVAL; 609 } 610 611 return agaw; 612 } 613 614 /* 615 * Set up the scalable mode pasid entry for second only translation type. 616 */ 617 int intel_pasid_setup_second_level(struct intel_iommu *iommu, 618 struct dmar_domain *domain, 619 struct device *dev, int pasid) 620 { 621 struct pasid_entry *pte; 622 struct dma_pte *pgd; 623 u64 pgd_val; 624 int agaw; 625 u16 did; 626 627 /* 628 * If hardware advertises no support for second level 629 * translation, return directly. 630 */ 631 if (!ecap_slts(iommu->ecap)) { 632 pr_err("No second level translation support on %s\n", 633 iommu->name); 634 return -EINVAL; 635 } 636 637 pgd = domain->pgd; 638 agaw = iommu_skip_agaw(domain, iommu, &pgd); 639 if (agaw < 0) { 640 dev_err(dev, "Invalid domain page table\n"); 641 return -EINVAL; 642 } 643 644 pgd_val = virt_to_phys(pgd); 645 did = domain->iommu_did[iommu->seq_id]; 646 647 pte = intel_pasid_get_entry(dev, pasid); 648 if (!pte) { 649 dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); 650 return -ENODEV; 651 } 652 653 pasid_clear_entry(pte); 654 pasid_set_domain_id(pte, did); 655 pasid_set_slptr(pte, pgd_val); 656 pasid_set_address_width(pte, agaw); 657 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); 658 pasid_set_fault_enable(pte); 659 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 660 661 /* 662 * Since it is a second level only translation setup, we should 663 * set SRE bit as well (addresses are expected to be GPAs). 664 */ 665 pasid_set_sre(pte); 666 pasid_set_present(pte); 667 pasid_flush_caches(iommu, pte, pasid, did); 668 669 return 0; 670 } 671 672 /* 673 * Set up the scalable mode pasid entry for passthrough translation type. 674 */ 675 int intel_pasid_setup_pass_through(struct intel_iommu *iommu, 676 struct dmar_domain *domain, 677 struct device *dev, int pasid) 678 { 679 u16 did = FLPT_DEFAULT_DID; 680 struct pasid_entry *pte; 681 682 pte = intel_pasid_get_entry(dev, pasid); 683 if (!pte) { 684 dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); 685 return -ENODEV; 686 } 687 688 pasid_clear_entry(pte); 689 pasid_set_domain_id(pte, did); 690 pasid_set_address_width(pte, iommu->agaw); 691 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); 692 pasid_set_fault_enable(pte); 693 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 694 695 /* 696 * We should set SRE bit as well since the addresses are expected 697 * to be GPAs. 698 */ 699 pasid_set_sre(pte); 700 pasid_set_present(pte); 701 pasid_flush_caches(iommu, pte, pasid, did); 702 703 return 0; 704 } 705 706 static int 707 intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte, 708 struct iommu_gpasid_bind_data_vtd *pasid_data) 709 { 710 /* 711 * Not all guest PASID table entry fields are passed down during bind, 712 * here we only set up the ones that are dependent on guest settings. 713 * Execution related bits such as NXE, SMEP are not supported. 714 * Other fields, such as snoop related, are set based on host needs 715 * regardless of guest settings. 716 */ 717 if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_SRE) { 718 if (!ecap_srs(iommu->ecap)) { 719 pr_err_ratelimited("No supervisor request support on %s\n", 720 iommu->name); 721 return -EINVAL; 722 } 723 pasid_set_sre(pte); 724 } 725 726 if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) { 727 if (!ecap_eafs(iommu->ecap)) { 728 pr_err_ratelimited("No extended access flag support on %s\n", 729 iommu->name); 730 return -EINVAL; 731 } 732 pasid_set_eafe(pte); 733 } 734 735 /* 736 * Memory type is only applicable to devices inside processor coherent 737 * domain. Will add MTS support once coherent devices are available. 738 */ 739 if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_MTS_MASK) { 740 pr_warn_ratelimited("No memory type support %s\n", 741 iommu->name); 742 return -EINVAL; 743 } 744 745 return 0; 746 } 747 748 /** 749 * intel_pasid_setup_nested() - Set up PASID entry for nested translation. 750 * This could be used for guest shared virtual address. In this case, the 751 * first level page tables are used for GVA-GPA translation in the guest, 752 * second level page tables are used for GPA-HPA translation. 753 * 754 * @iommu: IOMMU which the device belong to 755 * @dev: Device to be set up for translation 756 * @gpgd: FLPTPTR: First Level Page translation pointer in GPA 757 * @pasid: PASID to be programmed in the device PASID table 758 * @pasid_data: Additional PASID info from the guest bind request 759 * @domain: Domain info for setting up second level page tables 760 * @addr_width: Address width of the first level (guest) 761 */ 762 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, 763 pgd_t *gpgd, int pasid, 764 struct iommu_gpasid_bind_data_vtd *pasid_data, 765 struct dmar_domain *domain, int addr_width) 766 { 767 struct pasid_entry *pte; 768 struct dma_pte *pgd; 769 int ret = 0; 770 u64 pgd_val; 771 int agaw; 772 u16 did; 773 774 if (!ecap_nest(iommu->ecap)) { 775 pr_err_ratelimited("IOMMU: %s: No nested translation support\n", 776 iommu->name); 777 return -EINVAL; 778 } 779 780 if (!(domain->flags & DOMAIN_FLAG_NESTING_MODE)) { 781 pr_err_ratelimited("Domain is not in nesting mode, %x\n", 782 domain->flags); 783 return -EINVAL; 784 } 785 786 pte = intel_pasid_get_entry(dev, pasid); 787 if (WARN_ON(!pte)) 788 return -EINVAL; 789 790 /* 791 * Caller must ensure PASID entry is not in use, i.e. not bind the 792 * same PASID to the same device twice. 793 */ 794 if (pasid_pte_is_present(pte)) 795 return -EBUSY; 796 797 pasid_clear_entry(pte); 798 799 /* Sanity checking performed by caller to make sure address 800 * width matching in two dimensions: 801 * 1. CPU vs. IOMMU 802 * 2. Guest vs. Host. 803 */ 804 switch (addr_width) { 805 #ifdef CONFIG_X86 806 case ADDR_WIDTH_5LEVEL: 807 if (!cpu_feature_enabled(X86_FEATURE_LA57) || 808 !cap_5lp_support(iommu->cap)) { 809 dev_err_ratelimited(dev, 810 "5-level paging not supported\n"); 811 return -EINVAL; 812 } 813 814 pasid_set_flpm(pte, 1); 815 break; 816 #endif 817 case ADDR_WIDTH_4LEVEL: 818 pasid_set_flpm(pte, 0); 819 break; 820 default: 821 dev_err_ratelimited(dev, "Invalid guest address width %d\n", 822 addr_width); 823 return -EINVAL; 824 } 825 826 /* First level PGD is in GPA, must be supported by the second level */ 827 if ((uintptr_t)gpgd > domain->max_addr) { 828 dev_err_ratelimited(dev, 829 "Guest PGD %lx not supported, max %llx\n", 830 (uintptr_t)gpgd, domain->max_addr); 831 return -EINVAL; 832 } 833 pasid_set_flptr(pte, (uintptr_t)gpgd); 834 835 ret = intel_pasid_setup_bind_data(iommu, pte, pasid_data); 836 if (ret) 837 return ret; 838 839 /* Setup the second level based on the given domain */ 840 pgd = domain->pgd; 841 842 agaw = iommu_skip_agaw(domain, iommu, &pgd); 843 if (agaw < 0) { 844 dev_err_ratelimited(dev, "Invalid domain page table\n"); 845 return -EINVAL; 846 } 847 pgd_val = virt_to_phys(pgd); 848 pasid_set_slptr(pte, pgd_val); 849 pasid_set_fault_enable(pte); 850 851 did = domain->iommu_did[iommu->seq_id]; 852 pasid_set_domain_id(pte, did); 853 854 pasid_set_address_width(pte, agaw); 855 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 856 857 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); 858 pasid_set_present(pte); 859 pasid_flush_caches(iommu, pte, pasid, did); 860 861 return ret; 862 } 863