1 // SPDX-License-Identifier: GPL-2.0 2 /** 3 * intel-pasid.c - PASID idr, table and entry manipulation 4 * 5 * Copyright (C) 2018 Intel Corporation 6 * 7 * Author: Lu Baolu <baolu.lu@linux.intel.com> 8 */ 9 10 #define pr_fmt(fmt) "DMAR: " fmt 11 12 #include <linux/bitops.h> 13 #include <linux/cpufeature.h> 14 #include <linux/dmar.h> 15 #include <linux/intel-iommu.h> 16 #include <linux/iommu.h> 17 #include <linux/memory.h> 18 #include <linux/pci.h> 19 #include <linux/pci-ats.h> 20 #include <linux/spinlock.h> 21 22 #include "intel-pasid.h" 23 24 /* 25 * Intel IOMMU system wide PASID name space: 26 */ 27 static DEFINE_SPINLOCK(pasid_lock); 28 u32 intel_pasid_max_id = PASID_MAX; 29 30 int vcmd_alloc_pasid(struct intel_iommu *iommu, unsigned int *pasid) 31 { 32 unsigned long flags; 33 u8 status_code; 34 int ret = 0; 35 u64 res; 36 37 raw_spin_lock_irqsave(&iommu->register_lock, flags); 38 dmar_writeq(iommu->reg + DMAR_VCMD_REG, VCMD_CMD_ALLOC); 39 IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, 40 !(res & VCMD_VRSP_IP), res); 41 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 42 43 status_code = VCMD_VRSP_SC(res); 44 switch (status_code) { 45 case VCMD_VRSP_SC_SUCCESS: 46 *pasid = VCMD_VRSP_RESULT_PASID(res); 47 break; 48 case VCMD_VRSP_SC_NO_PASID_AVAIL: 49 pr_info("IOMMU: %s: No PASID available\n", iommu->name); 50 ret = -ENOSPC; 51 break; 52 default: 53 ret = -ENODEV; 54 pr_warn("IOMMU: %s: Unexpected error code %d\n", 55 iommu->name, status_code); 56 } 57 58 return ret; 59 } 60 61 void vcmd_free_pasid(struct intel_iommu *iommu, unsigned int pasid) 62 { 63 unsigned long flags; 64 u8 status_code; 65 u64 res; 66 67 raw_spin_lock_irqsave(&iommu->register_lock, flags); 68 dmar_writeq(iommu->reg + DMAR_VCMD_REG, 69 VCMD_CMD_OPERAND(pasid) | VCMD_CMD_FREE); 70 IOMMU_WAIT_OP(iommu, DMAR_VCRSP_REG, dmar_readq, 71 !(res & VCMD_VRSP_IP), res); 72 raw_spin_unlock_irqrestore(&iommu->register_lock, flags); 73 74 status_code = VCMD_VRSP_SC(res); 75 switch (status_code) { 76 case VCMD_VRSP_SC_SUCCESS: 77 break; 78 case VCMD_VRSP_SC_INVALID_PASID: 79 pr_info("IOMMU: %s: Invalid PASID\n", iommu->name); 80 break; 81 default: 82 pr_warn("IOMMU: %s: Unexpected error code %d\n", 83 iommu->name, status_code); 84 } 85 } 86 87 /* 88 * Per device pasid table management: 89 */ 90 static inline void 91 device_attach_pasid_table(struct device_domain_info *info, 92 struct pasid_table *pasid_table) 93 { 94 info->pasid_table = pasid_table; 95 list_add(&info->table, &pasid_table->dev); 96 } 97 98 static inline void 99 device_detach_pasid_table(struct device_domain_info *info, 100 struct pasid_table *pasid_table) 101 { 102 info->pasid_table = NULL; 103 list_del(&info->table); 104 } 105 106 struct pasid_table_opaque { 107 struct pasid_table **pasid_table; 108 int segment; 109 int bus; 110 int devfn; 111 }; 112 113 static int search_pasid_table(struct device_domain_info *info, void *opaque) 114 { 115 struct pasid_table_opaque *data = opaque; 116 117 if (info->iommu->segment == data->segment && 118 info->bus == data->bus && 119 info->devfn == data->devfn && 120 info->pasid_table) { 121 *data->pasid_table = info->pasid_table; 122 return 1; 123 } 124 125 return 0; 126 } 127 128 static int get_alias_pasid_table(struct pci_dev *pdev, u16 alias, void *opaque) 129 { 130 struct pasid_table_opaque *data = opaque; 131 132 data->segment = pci_domain_nr(pdev->bus); 133 data->bus = PCI_BUS_NUM(alias); 134 data->devfn = alias & 0xff; 135 136 return for_each_device_domain(&search_pasid_table, data); 137 } 138 139 /* 140 * Allocate a pasid table for @dev. It should be called in a 141 * single-thread context. 142 */ 143 int intel_pasid_alloc_table(struct device *dev) 144 { 145 struct device_domain_info *info; 146 struct pasid_table *pasid_table; 147 struct pasid_table_opaque data; 148 struct page *pages; 149 int max_pasid = 0; 150 int ret, order; 151 int size; 152 153 might_sleep(); 154 info = get_domain_info(dev); 155 if (WARN_ON(!info || !dev_is_pci(dev) || info->pasid_table)) 156 return -EINVAL; 157 158 /* DMA alias device already has a pasid table, use it: */ 159 data.pasid_table = &pasid_table; 160 ret = pci_for_each_dma_alias(to_pci_dev(dev), 161 &get_alias_pasid_table, &data); 162 if (ret) 163 goto attach_out; 164 165 pasid_table = kzalloc(sizeof(*pasid_table), GFP_KERNEL); 166 if (!pasid_table) 167 return -ENOMEM; 168 INIT_LIST_HEAD(&pasid_table->dev); 169 170 if (info->pasid_supported) 171 max_pasid = min_t(int, pci_max_pasids(to_pci_dev(dev)), 172 intel_pasid_max_id); 173 174 size = max_pasid >> (PASID_PDE_SHIFT - 3); 175 order = size ? get_order(size) : 0; 176 pages = alloc_pages_node(info->iommu->node, 177 GFP_KERNEL | __GFP_ZERO, order); 178 if (!pages) { 179 kfree(pasid_table); 180 return -ENOMEM; 181 } 182 183 pasid_table->table = page_address(pages); 184 pasid_table->order = order; 185 pasid_table->max_pasid = 1 << (order + PAGE_SHIFT + 3); 186 187 attach_out: 188 device_attach_pasid_table(info, pasid_table); 189 190 return 0; 191 } 192 193 void intel_pasid_free_table(struct device *dev) 194 { 195 struct device_domain_info *info; 196 struct pasid_table *pasid_table; 197 struct pasid_dir_entry *dir; 198 struct pasid_entry *table; 199 int i, max_pde; 200 201 info = get_domain_info(dev); 202 if (!info || !dev_is_pci(dev) || !info->pasid_table) 203 return; 204 205 pasid_table = info->pasid_table; 206 device_detach_pasid_table(info, pasid_table); 207 208 if (!list_empty(&pasid_table->dev)) 209 return; 210 211 /* Free scalable mode PASID directory tables: */ 212 dir = pasid_table->table; 213 max_pde = pasid_table->max_pasid >> PASID_PDE_SHIFT; 214 for (i = 0; i < max_pde; i++) { 215 table = get_pasid_table_from_pde(&dir[i]); 216 free_pgtable_page(table); 217 } 218 219 free_pages((unsigned long)pasid_table->table, pasid_table->order); 220 kfree(pasid_table); 221 } 222 223 struct pasid_table *intel_pasid_get_table(struct device *dev) 224 { 225 struct device_domain_info *info; 226 227 info = get_domain_info(dev); 228 if (!info) 229 return NULL; 230 231 return info->pasid_table; 232 } 233 234 int intel_pasid_get_dev_max_id(struct device *dev) 235 { 236 struct device_domain_info *info; 237 238 info = get_domain_info(dev); 239 if (!info || !info->pasid_table) 240 return 0; 241 242 return info->pasid_table->max_pasid; 243 } 244 245 struct pasid_entry *intel_pasid_get_entry(struct device *dev, int pasid) 246 { 247 struct device_domain_info *info; 248 struct pasid_table *pasid_table; 249 struct pasid_dir_entry *dir; 250 struct pasid_entry *entries; 251 int dir_index, index; 252 253 pasid_table = intel_pasid_get_table(dev); 254 if (WARN_ON(!pasid_table || pasid < 0 || 255 pasid >= intel_pasid_get_dev_max_id(dev))) 256 return NULL; 257 258 dir = pasid_table->table; 259 info = get_domain_info(dev); 260 dir_index = pasid >> PASID_PDE_SHIFT; 261 index = pasid & PASID_PTE_MASK; 262 263 spin_lock(&pasid_lock); 264 entries = get_pasid_table_from_pde(&dir[dir_index]); 265 if (!entries) { 266 entries = alloc_pgtable_page(info->iommu->node); 267 if (!entries) { 268 spin_unlock(&pasid_lock); 269 return NULL; 270 } 271 272 WRITE_ONCE(dir[dir_index].val, 273 (u64)virt_to_phys(entries) | PASID_PTE_PRESENT); 274 } 275 spin_unlock(&pasid_lock); 276 277 return &entries[index]; 278 } 279 280 /* 281 * Interfaces for PASID table entry manipulation: 282 */ 283 static inline void pasid_clear_entry(struct pasid_entry *pe) 284 { 285 WRITE_ONCE(pe->val[0], 0); 286 WRITE_ONCE(pe->val[1], 0); 287 WRITE_ONCE(pe->val[2], 0); 288 WRITE_ONCE(pe->val[3], 0); 289 WRITE_ONCE(pe->val[4], 0); 290 WRITE_ONCE(pe->val[5], 0); 291 WRITE_ONCE(pe->val[6], 0); 292 WRITE_ONCE(pe->val[7], 0); 293 } 294 295 static inline void pasid_clear_entry_with_fpd(struct pasid_entry *pe) 296 { 297 WRITE_ONCE(pe->val[0], PASID_PTE_FPD); 298 WRITE_ONCE(pe->val[1], 0); 299 WRITE_ONCE(pe->val[2], 0); 300 WRITE_ONCE(pe->val[3], 0); 301 WRITE_ONCE(pe->val[4], 0); 302 WRITE_ONCE(pe->val[5], 0); 303 WRITE_ONCE(pe->val[6], 0); 304 WRITE_ONCE(pe->val[7], 0); 305 } 306 307 static void 308 intel_pasid_clear_entry(struct device *dev, int pasid, bool fault_ignore) 309 { 310 struct pasid_entry *pe; 311 312 pe = intel_pasid_get_entry(dev, pasid); 313 if (WARN_ON(!pe)) 314 return; 315 316 if (fault_ignore && pasid_pte_is_present(pe)) 317 pasid_clear_entry_with_fpd(pe); 318 else 319 pasid_clear_entry(pe); 320 } 321 322 static inline void pasid_set_bits(u64 *ptr, u64 mask, u64 bits) 323 { 324 u64 old; 325 326 old = READ_ONCE(*ptr); 327 WRITE_ONCE(*ptr, (old & ~mask) | bits); 328 } 329 330 /* 331 * Setup the DID(Domain Identifier) field (Bit 64~79) of scalable mode 332 * PASID entry. 333 */ 334 static inline void 335 pasid_set_domain_id(struct pasid_entry *pe, u64 value) 336 { 337 pasid_set_bits(&pe->val[1], GENMASK_ULL(15, 0), value); 338 } 339 340 /* 341 * Get domain ID value of a scalable mode PASID entry. 342 */ 343 static inline u16 344 pasid_get_domain_id(struct pasid_entry *pe) 345 { 346 return (u16)(READ_ONCE(pe->val[1]) & GENMASK_ULL(15, 0)); 347 } 348 349 /* 350 * Setup the SLPTPTR(Second Level Page Table Pointer) field (Bit 12~63) 351 * of a scalable mode PASID entry. 352 */ 353 static inline void 354 pasid_set_slptr(struct pasid_entry *pe, u64 value) 355 { 356 pasid_set_bits(&pe->val[0], VTD_PAGE_MASK, value); 357 } 358 359 /* 360 * Setup the AW(Address Width) field (Bit 2~4) of a scalable mode PASID 361 * entry. 362 */ 363 static inline void 364 pasid_set_address_width(struct pasid_entry *pe, u64 value) 365 { 366 pasid_set_bits(&pe->val[0], GENMASK_ULL(4, 2), value << 2); 367 } 368 369 /* 370 * Setup the PGTT(PASID Granular Translation Type) field (Bit 6~8) 371 * of a scalable mode PASID entry. 372 */ 373 static inline void 374 pasid_set_translation_type(struct pasid_entry *pe, u64 value) 375 { 376 pasid_set_bits(&pe->val[0], GENMASK_ULL(8, 6), value << 6); 377 } 378 379 /* 380 * Enable fault processing by clearing the FPD(Fault Processing 381 * Disable) field (Bit 1) of a scalable mode PASID entry. 382 */ 383 static inline void pasid_set_fault_enable(struct pasid_entry *pe) 384 { 385 pasid_set_bits(&pe->val[0], 1 << 1, 0); 386 } 387 388 /* 389 * Setup the SRE(Supervisor Request Enable) field (Bit 128) of a 390 * scalable mode PASID entry. 391 */ 392 static inline void pasid_set_sre(struct pasid_entry *pe) 393 { 394 pasid_set_bits(&pe->val[2], 1 << 0, 1); 395 } 396 397 /* 398 * Setup the P(Present) field (Bit 0) of a scalable mode PASID 399 * entry. 400 */ 401 static inline void pasid_set_present(struct pasid_entry *pe) 402 { 403 pasid_set_bits(&pe->val[0], 1 << 0, 1); 404 } 405 406 /* 407 * Setup Page Walk Snoop bit (Bit 87) of a scalable mode PASID 408 * entry. 409 */ 410 static inline void pasid_set_page_snoop(struct pasid_entry *pe, bool value) 411 { 412 pasid_set_bits(&pe->val[1], 1 << 23, value << 23); 413 } 414 415 /* 416 * Setup the First Level Page table Pointer field (Bit 140~191) 417 * of a scalable mode PASID entry. 418 */ 419 static inline void 420 pasid_set_flptr(struct pasid_entry *pe, u64 value) 421 { 422 pasid_set_bits(&pe->val[2], VTD_PAGE_MASK, value); 423 } 424 425 /* 426 * Setup the First Level Paging Mode field (Bit 130~131) of a 427 * scalable mode PASID entry. 428 */ 429 static inline void 430 pasid_set_flpm(struct pasid_entry *pe, u64 value) 431 { 432 pasid_set_bits(&pe->val[2], GENMASK_ULL(3, 2), value << 2); 433 } 434 435 /* 436 * Setup the Extended Access Flag Enable (EAFE) field (Bit 135) 437 * of a scalable mode PASID entry. 438 */ 439 static inline void 440 pasid_set_eafe(struct pasid_entry *pe) 441 { 442 pasid_set_bits(&pe->val[2], 1 << 7, 1 << 7); 443 } 444 445 static void 446 pasid_cache_invalidation_with_pasid(struct intel_iommu *iommu, 447 u16 did, int pasid) 448 { 449 struct qi_desc desc; 450 451 desc.qw0 = QI_PC_DID(did) | QI_PC_GRAN(QI_PC_PASID_SEL) | 452 QI_PC_PASID(pasid) | QI_PC_TYPE; 453 desc.qw1 = 0; 454 desc.qw2 = 0; 455 desc.qw3 = 0; 456 457 qi_submit_sync(iommu, &desc, 1, 0); 458 } 459 460 static void 461 iotlb_invalidation_with_pasid(struct intel_iommu *iommu, u16 did, u32 pasid) 462 { 463 struct qi_desc desc; 464 465 desc.qw0 = QI_EIOTLB_PASID(pasid) | QI_EIOTLB_DID(did) | 466 QI_EIOTLB_GRAN(QI_GRAN_NONG_PASID) | QI_EIOTLB_TYPE; 467 desc.qw1 = 0; 468 desc.qw2 = 0; 469 desc.qw3 = 0; 470 471 qi_submit_sync(iommu, &desc, 1, 0); 472 } 473 474 static void 475 devtlb_invalidation_with_pasid(struct intel_iommu *iommu, 476 struct device *dev, int pasid) 477 { 478 struct device_domain_info *info; 479 u16 sid, qdep, pfsid; 480 481 info = get_domain_info(dev); 482 if (!info || !info->ats_enabled) 483 return; 484 485 sid = info->bus << 8 | info->devfn; 486 qdep = info->ats_qdep; 487 pfsid = info->pfsid; 488 489 qi_flush_dev_iotlb(iommu, sid, pfsid, qdep, 0, 64 - VTD_PAGE_SHIFT); 490 } 491 492 void intel_pasid_tear_down_entry(struct intel_iommu *iommu, struct device *dev, 493 int pasid, bool fault_ignore) 494 { 495 struct pasid_entry *pte; 496 u16 did; 497 498 pte = intel_pasid_get_entry(dev, pasid); 499 if (WARN_ON(!pte)) 500 return; 501 502 did = pasid_get_domain_id(pte); 503 intel_pasid_clear_entry(dev, pasid, fault_ignore); 504 505 if (!ecap_coherent(iommu->ecap)) 506 clflush_cache_range(pte, sizeof(*pte)); 507 508 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 509 iotlb_invalidation_with_pasid(iommu, did, pasid); 510 511 /* Device IOTLB doesn't need to be flushed in caching mode. */ 512 if (!cap_caching_mode(iommu->cap)) 513 devtlb_invalidation_with_pasid(iommu, dev, pasid); 514 } 515 516 static void pasid_flush_caches(struct intel_iommu *iommu, 517 struct pasid_entry *pte, 518 int pasid, u16 did) 519 { 520 if (!ecap_coherent(iommu->ecap)) 521 clflush_cache_range(pte, sizeof(*pte)); 522 523 if (cap_caching_mode(iommu->cap)) { 524 pasid_cache_invalidation_with_pasid(iommu, did, pasid); 525 iotlb_invalidation_with_pasid(iommu, did, pasid); 526 } else { 527 iommu_flush_write_buffer(iommu); 528 } 529 } 530 531 /* 532 * Set up the scalable mode pasid table entry for first only 533 * translation type. 534 */ 535 int intel_pasid_setup_first_level(struct intel_iommu *iommu, 536 struct device *dev, pgd_t *pgd, 537 int pasid, u16 did, int flags) 538 { 539 struct pasid_entry *pte; 540 541 if (!ecap_flts(iommu->ecap)) { 542 pr_err("No first level translation support on %s\n", 543 iommu->name); 544 return -EINVAL; 545 } 546 547 pte = intel_pasid_get_entry(dev, pasid); 548 if (WARN_ON(!pte)) 549 return -EINVAL; 550 551 pasid_clear_entry(pte); 552 553 /* Setup the first level page table pointer: */ 554 pasid_set_flptr(pte, (u64)__pa(pgd)); 555 if (flags & PASID_FLAG_SUPERVISOR_MODE) { 556 if (!ecap_srs(iommu->ecap)) { 557 pr_err("No supervisor request support on %s\n", 558 iommu->name); 559 return -EINVAL; 560 } 561 pasid_set_sre(pte); 562 } 563 564 if (flags & PASID_FLAG_FL5LP) { 565 if (cap_5lp_support(iommu->cap)) { 566 pasid_set_flpm(pte, 1); 567 } else { 568 pr_err("No 5-level paging support for first-level\n"); 569 pasid_clear_entry(pte); 570 return -EINVAL; 571 } 572 } 573 574 pasid_set_domain_id(pte, did); 575 pasid_set_address_width(pte, iommu->agaw); 576 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 577 578 /* Setup Present and PASID Granular Transfer Type: */ 579 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_FL_ONLY); 580 pasid_set_present(pte); 581 pasid_flush_caches(iommu, pte, pasid, did); 582 583 return 0; 584 } 585 586 /* 587 * Skip top levels of page tables for iommu which has less agaw 588 * than default. Unnecessary for PT mode. 589 */ 590 static inline int iommu_skip_agaw(struct dmar_domain *domain, 591 struct intel_iommu *iommu, 592 struct dma_pte **pgd) 593 { 594 int agaw; 595 596 for (agaw = domain->agaw; agaw > iommu->agaw; agaw--) { 597 *pgd = phys_to_virt(dma_pte_addr(*pgd)); 598 if (!dma_pte_present(*pgd)) 599 return -EINVAL; 600 } 601 602 return agaw; 603 } 604 605 /* 606 * Set up the scalable mode pasid entry for second only translation type. 607 */ 608 int intel_pasid_setup_second_level(struct intel_iommu *iommu, 609 struct dmar_domain *domain, 610 struct device *dev, int pasid) 611 { 612 struct pasid_entry *pte; 613 struct dma_pte *pgd; 614 u64 pgd_val; 615 int agaw; 616 u16 did; 617 618 /* 619 * If hardware advertises no support for second level 620 * translation, return directly. 621 */ 622 if (!ecap_slts(iommu->ecap)) { 623 pr_err("No second level translation support on %s\n", 624 iommu->name); 625 return -EINVAL; 626 } 627 628 pgd = domain->pgd; 629 agaw = iommu_skip_agaw(domain, iommu, &pgd); 630 if (agaw < 0) { 631 dev_err(dev, "Invalid domain page table\n"); 632 return -EINVAL; 633 } 634 635 pgd_val = virt_to_phys(pgd); 636 did = domain->iommu_did[iommu->seq_id]; 637 638 pte = intel_pasid_get_entry(dev, pasid); 639 if (!pte) { 640 dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); 641 return -ENODEV; 642 } 643 644 pasid_clear_entry(pte); 645 pasid_set_domain_id(pte, did); 646 pasid_set_slptr(pte, pgd_val); 647 pasid_set_address_width(pte, agaw); 648 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_SL_ONLY); 649 pasid_set_fault_enable(pte); 650 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 651 652 /* 653 * Since it is a second level only translation setup, we should 654 * set SRE bit as well (addresses are expected to be GPAs). 655 */ 656 pasid_set_sre(pte); 657 pasid_set_present(pte); 658 pasid_flush_caches(iommu, pte, pasid, did); 659 660 return 0; 661 } 662 663 /* 664 * Set up the scalable mode pasid entry for passthrough translation type. 665 */ 666 int intel_pasid_setup_pass_through(struct intel_iommu *iommu, 667 struct dmar_domain *domain, 668 struct device *dev, int pasid) 669 { 670 u16 did = FLPT_DEFAULT_DID; 671 struct pasid_entry *pte; 672 673 pte = intel_pasid_get_entry(dev, pasid); 674 if (!pte) { 675 dev_err(dev, "Failed to get pasid entry of PASID %d\n", pasid); 676 return -ENODEV; 677 } 678 679 pasid_clear_entry(pte); 680 pasid_set_domain_id(pte, did); 681 pasid_set_address_width(pte, iommu->agaw); 682 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_PT); 683 pasid_set_fault_enable(pte); 684 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 685 686 /* 687 * We should set SRE bit as well since the addresses are expected 688 * to be GPAs. 689 */ 690 pasid_set_sre(pte); 691 pasid_set_present(pte); 692 pasid_flush_caches(iommu, pte, pasid, did); 693 694 return 0; 695 } 696 697 static int 698 intel_pasid_setup_bind_data(struct intel_iommu *iommu, struct pasid_entry *pte, 699 struct iommu_gpasid_bind_data_vtd *pasid_data) 700 { 701 /* 702 * Not all guest PASID table entry fields are passed down during bind, 703 * here we only set up the ones that are dependent on guest settings. 704 * Execution related bits such as NXE, SMEP are not supported. 705 * Other fields, such as snoop related, are set based on host needs 706 * regardless of guest settings. 707 */ 708 if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_SRE) { 709 if (!ecap_srs(iommu->ecap)) { 710 pr_err_ratelimited("No supervisor request support on %s\n", 711 iommu->name); 712 return -EINVAL; 713 } 714 pasid_set_sre(pte); 715 } 716 717 if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_EAFE) { 718 if (!ecap_eafs(iommu->ecap)) { 719 pr_err_ratelimited("No extended access flag support on %s\n", 720 iommu->name); 721 return -EINVAL; 722 } 723 pasid_set_eafe(pte); 724 } 725 726 /* 727 * Memory type is only applicable to devices inside processor coherent 728 * domain. Will add MTS support once coherent devices are available. 729 */ 730 if (pasid_data->flags & IOMMU_SVA_VTD_GPASID_MTS_MASK) { 731 pr_warn_ratelimited("No memory type support %s\n", 732 iommu->name); 733 return -EINVAL; 734 } 735 736 return 0; 737 } 738 739 /** 740 * intel_pasid_setup_nested() - Set up PASID entry for nested translation. 741 * This could be used for guest shared virtual address. In this case, the 742 * first level page tables are used for GVA-GPA translation in the guest, 743 * second level page tables are used for GPA-HPA translation. 744 * 745 * @iommu: IOMMU which the device belong to 746 * @dev: Device to be set up for translation 747 * @gpgd: FLPTPTR: First Level Page translation pointer in GPA 748 * @pasid: PASID to be programmed in the device PASID table 749 * @pasid_data: Additional PASID info from the guest bind request 750 * @domain: Domain info for setting up second level page tables 751 * @addr_width: Address width of the first level (guest) 752 */ 753 int intel_pasid_setup_nested(struct intel_iommu *iommu, struct device *dev, 754 pgd_t *gpgd, int pasid, 755 struct iommu_gpasid_bind_data_vtd *pasid_data, 756 struct dmar_domain *domain, int addr_width) 757 { 758 struct pasid_entry *pte; 759 struct dma_pte *pgd; 760 int ret = 0; 761 u64 pgd_val; 762 int agaw; 763 u16 did; 764 765 if (!ecap_nest(iommu->ecap)) { 766 pr_err_ratelimited("IOMMU: %s: No nested translation support\n", 767 iommu->name); 768 return -EINVAL; 769 } 770 771 if (!(domain->flags & DOMAIN_FLAG_NESTING_MODE)) { 772 pr_err_ratelimited("Domain is not in nesting mode, %x\n", 773 domain->flags); 774 return -EINVAL; 775 } 776 777 pte = intel_pasid_get_entry(dev, pasid); 778 if (WARN_ON(!pte)) 779 return -EINVAL; 780 781 /* 782 * Caller must ensure PASID entry is not in use, i.e. not bind the 783 * same PASID to the same device twice. 784 */ 785 if (pasid_pte_is_present(pte)) 786 return -EBUSY; 787 788 pasid_clear_entry(pte); 789 790 /* Sanity checking performed by caller to make sure address 791 * width matching in two dimensions: 792 * 1. CPU vs. IOMMU 793 * 2. Guest vs. Host. 794 */ 795 switch (addr_width) { 796 #ifdef CONFIG_X86 797 case ADDR_WIDTH_5LEVEL: 798 if (!cpu_feature_enabled(X86_FEATURE_LA57) || 799 !cap_5lp_support(iommu->cap)) { 800 dev_err_ratelimited(dev, 801 "5-level paging not supported\n"); 802 return -EINVAL; 803 } 804 805 pasid_set_flpm(pte, 1); 806 break; 807 #endif 808 case ADDR_WIDTH_4LEVEL: 809 pasid_set_flpm(pte, 0); 810 break; 811 default: 812 dev_err_ratelimited(dev, "Invalid guest address width %d\n", 813 addr_width); 814 return -EINVAL; 815 } 816 817 /* First level PGD is in GPA, must be supported by the second level */ 818 if ((uintptr_t)gpgd > domain->max_addr) { 819 dev_err_ratelimited(dev, 820 "Guest PGD %lx not supported, max %llx\n", 821 (uintptr_t)gpgd, domain->max_addr); 822 return -EINVAL; 823 } 824 pasid_set_flptr(pte, (uintptr_t)gpgd); 825 826 ret = intel_pasid_setup_bind_data(iommu, pte, pasid_data); 827 if (ret) 828 return ret; 829 830 /* Setup the second level based on the given domain */ 831 pgd = domain->pgd; 832 833 agaw = iommu_skip_agaw(domain, iommu, &pgd); 834 if (agaw < 0) { 835 dev_err_ratelimited(dev, "Invalid domain page table\n"); 836 return -EINVAL; 837 } 838 pgd_val = virt_to_phys(pgd); 839 pasid_set_slptr(pte, pgd_val); 840 pasid_set_fault_enable(pte); 841 842 did = domain->iommu_did[iommu->seq_id]; 843 pasid_set_domain_id(pte, did); 844 845 pasid_set_address_width(pte, agaw); 846 pasid_set_page_snoop(pte, !!ecap_smpwc(iommu->ecap)); 847 848 pasid_set_translation_type(pte, PASID_ENTRY_PGTT_NESTED); 849 pasid_set_present(pte); 850 pasid_flush_caches(iommu, pte, pasid, did); 851 852 return ret; 853 } 854