1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * CPU-agnostic AMD IO page table allocator. 4 * 5 * Copyright (C) 2020 Advanced Micro Devices, Inc. 6 * Author: Suravee Suthikulpanit <suravee.suthikulpanit@amd.com> 7 */ 8 9 #define pr_fmt(fmt) "AMD-Vi: " fmt 10 #define dev_fmt(fmt) pr_fmt(fmt) 11 12 #include <linux/atomic.h> 13 #include <linux/bitops.h> 14 #include <linux/io-pgtable.h> 15 #include <linux/kernel.h> 16 #include <linux/sizes.h> 17 #include <linux/slab.h> 18 #include <linux/types.h> 19 #include <linux/dma-mapping.h> 20 21 #include <asm/barrier.h> 22 23 #include "amd_iommu_types.h" 24 #include "amd_iommu.h" 25 26 static void v1_tlb_flush_all(void *cookie) 27 { 28 } 29 30 static void v1_tlb_flush_walk(unsigned long iova, size_t size, 31 size_t granule, void *cookie) 32 { 33 } 34 35 static void v1_tlb_add_page(struct iommu_iotlb_gather *gather, 36 unsigned long iova, size_t granule, 37 void *cookie) 38 { 39 } 40 41 static const struct iommu_flush_ops v1_flush_ops = { 42 .tlb_flush_all = v1_tlb_flush_all, 43 .tlb_flush_walk = v1_tlb_flush_walk, 44 .tlb_add_page = v1_tlb_add_page, 45 }; 46 47 /* 48 * Helper function to get the first pte of a large mapping 49 */ 50 static u64 *first_pte_l7(u64 *pte, unsigned long *page_size, 51 unsigned long *count) 52 { 53 unsigned long pte_mask, pg_size, cnt; 54 u64 *fpte; 55 56 pg_size = PTE_PAGE_SIZE(*pte); 57 cnt = PAGE_SIZE_PTE_COUNT(pg_size); 58 pte_mask = ~((cnt << 3) - 1); 59 fpte = (u64 *)(((unsigned long)pte) & pte_mask); 60 61 if (page_size) 62 *page_size = pg_size; 63 64 if (count) 65 *count = cnt; 66 67 return fpte; 68 } 69 70 /**************************************************************************** 71 * 72 * The functions below are used the create the page table mappings for 73 * unity mapped regions. 74 * 75 ****************************************************************************/ 76 77 static void free_pt_page(u64 *pt, struct list_head *freelist) 78 { 79 struct page *p = virt_to_page(pt); 80 81 list_add_tail(&p->lru, freelist); 82 } 83 84 static void free_pt_lvl(u64 *pt, struct list_head *freelist, int lvl) 85 { 86 u64 *p; 87 int i; 88 89 for (i = 0; i < 512; ++i) { 90 /* PTE present? */ 91 if (!IOMMU_PTE_PRESENT(pt[i])) 92 continue; 93 94 /* Large PTE? */ 95 if (PM_PTE_LEVEL(pt[i]) == 0 || 96 PM_PTE_LEVEL(pt[i]) == 7) 97 continue; 98 99 /* 100 * Free the next level. No need to look at l1 tables here since 101 * they can only contain leaf PTEs; just free them directly. 102 */ 103 p = IOMMU_PTE_PAGE(pt[i]); 104 if (lvl > 2) 105 free_pt_lvl(p, freelist, lvl - 1); 106 else 107 free_pt_page(p, freelist); 108 } 109 110 free_pt_page(pt, freelist); 111 } 112 113 static void free_sub_pt(u64 *root, int mode, struct list_head *freelist) 114 { 115 switch (mode) { 116 case PAGE_MODE_NONE: 117 case PAGE_MODE_7_LEVEL: 118 break; 119 case PAGE_MODE_1_LEVEL: 120 free_pt_page(root, freelist); 121 break; 122 case PAGE_MODE_2_LEVEL: 123 case PAGE_MODE_3_LEVEL: 124 case PAGE_MODE_4_LEVEL: 125 case PAGE_MODE_5_LEVEL: 126 case PAGE_MODE_6_LEVEL: 127 free_pt_lvl(root, freelist, mode); 128 break; 129 default: 130 BUG(); 131 } 132 } 133 134 void amd_iommu_domain_set_pgtable(struct protection_domain *domain, 135 u64 *root, int mode) 136 { 137 u64 pt_root; 138 139 /* lowest 3 bits encode pgtable mode */ 140 pt_root = mode & 7; 141 pt_root |= (u64)root; 142 143 amd_iommu_domain_set_pt_root(domain, pt_root); 144 } 145 146 /* 147 * This function is used to add another level to an IO page table. Adding 148 * another level increases the size of the address space by 9 bits to a size up 149 * to 64 bits. 150 */ 151 static bool increase_address_space(struct protection_domain *domain, 152 unsigned long address, 153 gfp_t gfp) 154 { 155 unsigned long flags; 156 bool ret = true; 157 u64 *pte; 158 159 pte = (void *)get_zeroed_page(gfp); 160 if (!pte) 161 return false; 162 163 spin_lock_irqsave(&domain->lock, flags); 164 165 if (address <= PM_LEVEL_SIZE(domain->iop.mode)) 166 goto out; 167 168 ret = false; 169 if (WARN_ON_ONCE(domain->iop.mode == PAGE_MODE_6_LEVEL)) 170 goto out; 171 172 *pte = PM_LEVEL_PDE(domain->iop.mode, iommu_virt_to_phys(domain->iop.root)); 173 174 domain->iop.root = pte; 175 domain->iop.mode += 1; 176 amd_iommu_update_and_flush_device_table(domain); 177 amd_iommu_domain_flush_complete(domain); 178 179 /* 180 * Device Table needs to be updated and flushed before the new root can 181 * be published. 182 */ 183 amd_iommu_domain_set_pgtable(domain, pte, domain->iop.mode); 184 185 pte = NULL; 186 ret = true; 187 188 out: 189 spin_unlock_irqrestore(&domain->lock, flags); 190 free_page((unsigned long)pte); 191 192 return ret; 193 } 194 195 static u64 *alloc_pte(struct protection_domain *domain, 196 unsigned long address, 197 unsigned long page_size, 198 u64 **pte_page, 199 gfp_t gfp, 200 bool *updated) 201 { 202 int level, end_lvl; 203 u64 *pte, *page; 204 205 BUG_ON(!is_power_of_2(page_size)); 206 207 while (address > PM_LEVEL_SIZE(domain->iop.mode)) { 208 /* 209 * Return an error if there is no memory to update the 210 * page-table. 211 */ 212 if (!increase_address_space(domain, address, gfp)) 213 return NULL; 214 } 215 216 217 level = domain->iop.mode - 1; 218 pte = &domain->iop.root[PM_LEVEL_INDEX(level, address)]; 219 address = PAGE_SIZE_ALIGN(address, page_size); 220 end_lvl = PAGE_SIZE_LEVEL(page_size); 221 222 while (level > end_lvl) { 223 u64 __pte, __npte; 224 int pte_level; 225 226 __pte = *pte; 227 pte_level = PM_PTE_LEVEL(__pte); 228 229 /* 230 * If we replace a series of large PTEs, we need 231 * to tear down all of them. 232 */ 233 if (IOMMU_PTE_PRESENT(__pte) && 234 pte_level == PAGE_MODE_7_LEVEL) { 235 unsigned long count, i; 236 u64 *lpte; 237 238 lpte = first_pte_l7(pte, NULL, &count); 239 240 /* 241 * Unmap the replicated PTEs that still match the 242 * original large mapping 243 */ 244 for (i = 0; i < count; ++i) 245 cmpxchg64(&lpte[i], __pte, 0ULL); 246 247 *updated = true; 248 continue; 249 } 250 251 if (!IOMMU_PTE_PRESENT(__pte) || 252 pte_level == PAGE_MODE_NONE) { 253 page = (u64 *)get_zeroed_page(gfp); 254 255 if (!page) 256 return NULL; 257 258 __npte = PM_LEVEL_PDE(level, iommu_virt_to_phys(page)); 259 260 /* pte could have been changed somewhere. */ 261 if (!try_cmpxchg64(pte, &__pte, __npte)) 262 free_page((unsigned long)page); 263 else if (IOMMU_PTE_PRESENT(__pte)) 264 *updated = true; 265 266 continue; 267 } 268 269 /* No level skipping support yet */ 270 if (pte_level != level) 271 return NULL; 272 273 level -= 1; 274 275 pte = IOMMU_PTE_PAGE(__pte); 276 277 if (pte_page && level == end_lvl) 278 *pte_page = pte; 279 280 pte = &pte[PM_LEVEL_INDEX(level, address)]; 281 } 282 283 return pte; 284 } 285 286 /* 287 * This function checks if there is a PTE for a given dma address. If 288 * there is one, it returns the pointer to it. 289 */ 290 static u64 *fetch_pte(struct amd_io_pgtable *pgtable, 291 unsigned long address, 292 unsigned long *page_size) 293 { 294 int level; 295 u64 *pte; 296 297 *page_size = 0; 298 299 if (address > PM_LEVEL_SIZE(pgtable->mode)) 300 return NULL; 301 302 level = pgtable->mode - 1; 303 pte = &pgtable->root[PM_LEVEL_INDEX(level, address)]; 304 *page_size = PTE_LEVEL_PAGE_SIZE(level); 305 306 while (level > 0) { 307 308 /* Not Present */ 309 if (!IOMMU_PTE_PRESENT(*pte)) 310 return NULL; 311 312 /* Large PTE */ 313 if (PM_PTE_LEVEL(*pte) == 7 || 314 PM_PTE_LEVEL(*pte) == 0) 315 break; 316 317 /* No level skipping support yet */ 318 if (PM_PTE_LEVEL(*pte) != level) 319 return NULL; 320 321 level -= 1; 322 323 /* Walk to the next level */ 324 pte = IOMMU_PTE_PAGE(*pte); 325 pte = &pte[PM_LEVEL_INDEX(level, address)]; 326 *page_size = PTE_LEVEL_PAGE_SIZE(level); 327 } 328 329 /* 330 * If we have a series of large PTEs, make 331 * sure to return a pointer to the first one. 332 */ 333 if (PM_PTE_LEVEL(*pte) == PAGE_MODE_7_LEVEL) 334 pte = first_pte_l7(pte, page_size, NULL); 335 336 return pte; 337 } 338 339 static void free_clear_pte(u64 *pte, u64 pteval, struct list_head *freelist) 340 { 341 u64 *pt; 342 int mode; 343 344 while (!try_cmpxchg64(pte, &pteval, 0)) 345 pr_warn("AMD-Vi: IOMMU pte changed since we read it\n"); 346 347 if (!IOMMU_PTE_PRESENT(pteval)) 348 return; 349 350 pt = IOMMU_PTE_PAGE(pteval); 351 mode = IOMMU_PTE_MODE(pteval); 352 353 free_sub_pt(pt, mode, freelist); 354 } 355 356 /* 357 * Generic mapping functions. It maps a physical address into a DMA 358 * address space. It allocates the page table pages if necessary. 359 * In the future it can be extended to a generic mapping function 360 * supporting all features of AMD IOMMU page tables like level skipping 361 * and full 64 bit address spaces. 362 */ 363 static int iommu_v1_map_page(struct io_pgtable_ops *ops, unsigned long iova, 364 phys_addr_t paddr, size_t size, int prot, gfp_t gfp) 365 { 366 struct protection_domain *dom = io_pgtable_ops_to_domain(ops); 367 LIST_HEAD(freelist); 368 bool updated = false; 369 u64 __pte, *pte; 370 int ret, i, count; 371 372 BUG_ON(!IS_ALIGNED(iova, size)); 373 BUG_ON(!IS_ALIGNED(paddr, size)); 374 375 ret = -EINVAL; 376 if (!(prot & IOMMU_PROT_MASK)) 377 goto out; 378 379 count = PAGE_SIZE_PTE_COUNT(size); 380 pte = alloc_pte(dom, iova, size, NULL, gfp, &updated); 381 382 ret = -ENOMEM; 383 if (!pte) 384 goto out; 385 386 for (i = 0; i < count; ++i) 387 free_clear_pte(&pte[i], pte[i], &freelist); 388 389 if (!list_empty(&freelist)) 390 updated = true; 391 392 if (count > 1) { 393 __pte = PAGE_SIZE_PTE(__sme_set(paddr), size); 394 __pte |= PM_LEVEL_ENC(7) | IOMMU_PTE_PR | IOMMU_PTE_FC; 395 } else 396 __pte = __sme_set(paddr) | IOMMU_PTE_PR | IOMMU_PTE_FC; 397 398 if (prot & IOMMU_PROT_IR) 399 __pte |= IOMMU_PTE_IR; 400 if (prot & IOMMU_PROT_IW) 401 __pte |= IOMMU_PTE_IW; 402 403 for (i = 0; i < count; ++i) 404 pte[i] = __pte; 405 406 ret = 0; 407 408 out: 409 if (updated) { 410 unsigned long flags; 411 412 spin_lock_irqsave(&dom->lock, flags); 413 /* 414 * Flush domain TLB(s) and wait for completion. Any Device-Table 415 * Updates and flushing already happened in 416 * increase_address_space(). 417 */ 418 amd_iommu_domain_flush_tlb_pde(dom); 419 amd_iommu_domain_flush_complete(dom); 420 spin_unlock_irqrestore(&dom->lock, flags); 421 } 422 423 /* Everything flushed out, free pages now */ 424 put_pages_list(&freelist); 425 426 return ret; 427 } 428 429 static unsigned long iommu_v1_unmap_page(struct io_pgtable_ops *ops, 430 unsigned long iova, 431 size_t size, 432 struct iommu_iotlb_gather *gather) 433 { 434 struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); 435 unsigned long long unmapped; 436 unsigned long unmap_size; 437 u64 *pte; 438 439 BUG_ON(!is_power_of_2(size)); 440 441 unmapped = 0; 442 443 while (unmapped < size) { 444 pte = fetch_pte(pgtable, iova, &unmap_size); 445 if (pte) { 446 int i, count; 447 448 count = PAGE_SIZE_PTE_COUNT(unmap_size); 449 for (i = 0; i < count; i++) 450 pte[i] = 0ULL; 451 } 452 453 iova = (iova & ~(unmap_size - 1)) + unmap_size; 454 unmapped += unmap_size; 455 } 456 457 BUG_ON(unmapped && !is_power_of_2(unmapped)); 458 459 return unmapped; 460 } 461 462 static phys_addr_t iommu_v1_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) 463 { 464 struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); 465 unsigned long offset_mask, pte_pgsize; 466 u64 *pte, __pte; 467 468 pte = fetch_pte(pgtable, iova, &pte_pgsize); 469 470 if (!pte || !IOMMU_PTE_PRESENT(*pte)) 471 return 0; 472 473 offset_mask = pte_pgsize - 1; 474 __pte = __sme_clr(*pte & PM_ADDR_MASK); 475 476 return (__pte & ~offset_mask) | (iova & offset_mask); 477 } 478 479 /* 480 * ---------------------------------------------------- 481 */ 482 static void v1_free_pgtable(struct io_pgtable *iop) 483 { 484 struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); 485 struct protection_domain *dom; 486 LIST_HEAD(freelist); 487 488 if (pgtable->mode == PAGE_MODE_NONE) 489 return; 490 491 dom = container_of(pgtable, struct protection_domain, iop); 492 493 /* Page-table is not visible to IOMMU anymore, so free it */ 494 BUG_ON(pgtable->mode < PAGE_MODE_NONE || 495 pgtable->mode > PAGE_MODE_6_LEVEL); 496 497 free_sub_pt(pgtable->root, pgtable->mode, &freelist); 498 499 /* Update data structure */ 500 amd_iommu_domain_clr_pt_root(dom); 501 502 /* Make changes visible to IOMMUs */ 503 amd_iommu_domain_update(dom); 504 505 put_pages_list(&freelist); 506 } 507 508 static struct io_pgtable *v1_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) 509 { 510 struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); 511 512 cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES, 513 cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, 514 cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, 515 cfg->tlb = &v1_flush_ops; 516 517 pgtable->iop.ops.map = iommu_v1_map_page; 518 pgtable->iop.ops.unmap = iommu_v1_unmap_page; 519 pgtable->iop.ops.iova_to_phys = iommu_v1_iova_to_phys; 520 521 return &pgtable->iop; 522 } 523 524 struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns = { 525 .alloc = v1_alloc_pgtable, 526 .free = v1_free_pgtable, 527 }; 528