1 /* 2 * CPU-agnostic ARM page table allocator. 3 * 4 * ARMv7 Short-descriptor format, supporting 5 * - Basic memory attributes 6 * - Simplified access permissions (AP[2:1] model) 7 * - Backwards-compatible TEX remap 8 * - Large pages/supersections (if indicated by the caller) 9 * 10 * Not supporting: 11 * - Legacy access permissions (AP[2:0] model) 12 * 13 * Almost certainly never supporting: 14 * - PXN 15 * - Domains 16 * 17 * This program is free software; you can redistribute it and/or modify 18 * it under the terms of the GNU General Public License version 2 as 19 * published by the Free Software Foundation. 20 * 21 * This program is distributed in the hope that it will be useful, 22 * but WITHOUT ANY WARRANTY; without even the implied warranty of 23 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 24 * GNU General Public License for more details. 25 * 26 * You should have received a copy of the GNU General Public License 27 * along with this program. If not, see <http://www.gnu.org/licenses/>. 28 * 29 * Copyright (C) 2014-2015 ARM Limited 30 * Copyright (c) 2014-2015 MediaTek Inc. 31 */ 32 33 #define pr_fmt(fmt) "arm-v7s io-pgtable: " fmt 34 35 #include <linux/atomic.h> 36 #include <linux/dma-mapping.h> 37 #include <linux/gfp.h> 38 #include <linux/io-pgtable.h> 39 #include <linux/iommu.h> 40 #include <linux/kernel.h> 41 #include <linux/kmemleak.h> 42 #include <linux/sizes.h> 43 #include <linux/slab.h> 44 #include <linux/spinlock.h> 45 #include <linux/types.h> 46 47 #include <asm/barrier.h> 48 49 /* Struct accessors */ 50 #define io_pgtable_to_data(x) \ 51 container_of((x), struct arm_v7s_io_pgtable, iop) 52 53 #define io_pgtable_ops_to_data(x) \ 54 io_pgtable_to_data(io_pgtable_ops_to_pgtable(x)) 55 56 /* 57 * We have 32 bits total; 12 bits resolved at level 1, 8 bits at level 2, 58 * and 12 bits in a page. With some carefully-chosen coefficients we can 59 * hide the ugly inconsistencies behind these macros and at least let the 60 * rest of the code pretend to be somewhat sane. 61 */ 62 #define ARM_V7S_ADDR_BITS 32 63 #define _ARM_V7S_LVL_BITS(lvl) (16 - (lvl) * 4) 64 #define ARM_V7S_LVL_SHIFT(lvl) (ARM_V7S_ADDR_BITS - (4 + 8 * (lvl))) 65 #define ARM_V7S_TABLE_SHIFT 10 66 67 #define ARM_V7S_PTES_PER_LVL(lvl) (1 << _ARM_V7S_LVL_BITS(lvl)) 68 #define ARM_V7S_TABLE_SIZE(lvl) \ 69 (ARM_V7S_PTES_PER_LVL(lvl) * sizeof(arm_v7s_iopte)) 70 71 #define ARM_V7S_BLOCK_SIZE(lvl) (1UL << ARM_V7S_LVL_SHIFT(lvl)) 72 #define ARM_V7S_LVL_MASK(lvl) ((u32)(~0U << ARM_V7S_LVL_SHIFT(lvl))) 73 #define ARM_V7S_TABLE_MASK ((u32)(~0U << ARM_V7S_TABLE_SHIFT)) 74 #define _ARM_V7S_IDX_MASK(lvl) (ARM_V7S_PTES_PER_LVL(lvl) - 1) 75 #define ARM_V7S_LVL_IDX(addr, lvl) ({ \ 76 int _l = lvl; \ 77 ((u32)(addr) >> ARM_V7S_LVL_SHIFT(_l)) & _ARM_V7S_IDX_MASK(_l); \ 78 }) 79 80 /* 81 * Large page/supersection entries are effectively a block of 16 page/section 82 * entries, along the lines of the LPAE contiguous hint, but all with the 83 * same output address. For want of a better common name we'll call them 84 * "contiguous" versions of their respective page/section entries here, but 85 * noting the distinction (WRT to TLB maintenance) that they represent *one* 86 * entry repeated 16 times, not 16 separate entries (as in the LPAE case). 87 */ 88 #define ARM_V7S_CONT_PAGES 16 89 90 /* PTE type bits: these are all mixed up with XN/PXN bits in most cases */ 91 #define ARM_V7S_PTE_TYPE_TABLE 0x1 92 #define ARM_V7S_PTE_TYPE_PAGE 0x2 93 #define ARM_V7S_PTE_TYPE_CONT_PAGE 0x1 94 95 #define ARM_V7S_PTE_IS_VALID(pte) (((pte) & 0x3) != 0) 96 #define ARM_V7S_PTE_IS_TABLE(pte, lvl) \ 97 ((lvl) == 1 && (((pte) & 0x3) == ARM_V7S_PTE_TYPE_TABLE)) 98 99 /* Page table bits */ 100 #define ARM_V7S_ATTR_XN(lvl) BIT(4 * (2 - (lvl))) 101 #define ARM_V7S_ATTR_B BIT(2) 102 #define ARM_V7S_ATTR_C BIT(3) 103 #define ARM_V7S_ATTR_NS_TABLE BIT(3) 104 #define ARM_V7S_ATTR_NS_SECTION BIT(19) 105 106 #define ARM_V7S_CONT_SECTION BIT(18) 107 #define ARM_V7S_CONT_PAGE_XN_SHIFT 15 108 109 /* 110 * The attribute bits are consistently ordered*, but occupy bits [17:10] of 111 * a level 1 PTE vs. bits [11:4] at level 2. Thus we define the individual 112 * fields relative to that 8-bit block, plus a total shift relative to the PTE. 113 */ 114 #define ARM_V7S_ATTR_SHIFT(lvl) (16 - (lvl) * 6) 115 116 #define ARM_V7S_ATTR_MASK 0xff 117 #define ARM_V7S_ATTR_AP0 BIT(0) 118 #define ARM_V7S_ATTR_AP1 BIT(1) 119 #define ARM_V7S_ATTR_AP2 BIT(5) 120 #define ARM_V7S_ATTR_S BIT(6) 121 #define ARM_V7S_ATTR_NG BIT(7) 122 #define ARM_V7S_TEX_SHIFT 2 123 #define ARM_V7S_TEX_MASK 0x7 124 #define ARM_V7S_ATTR_TEX(val) (((val) & ARM_V7S_TEX_MASK) << ARM_V7S_TEX_SHIFT) 125 126 #define ARM_V7S_ATTR_MTK_4GB BIT(9) /* MTK extend it for 4GB mode */ 127 128 /* *well, except for TEX on level 2 large pages, of course :( */ 129 #define ARM_V7S_CONT_PAGE_TEX_SHIFT 6 130 #define ARM_V7S_CONT_PAGE_TEX_MASK (ARM_V7S_TEX_MASK << ARM_V7S_CONT_PAGE_TEX_SHIFT) 131 132 /* Simplified access permissions */ 133 #define ARM_V7S_PTE_AF ARM_V7S_ATTR_AP0 134 #define ARM_V7S_PTE_AP_UNPRIV ARM_V7S_ATTR_AP1 135 #define ARM_V7S_PTE_AP_RDONLY ARM_V7S_ATTR_AP2 136 137 /* Register bits */ 138 #define ARM_V7S_RGN_NC 0 139 #define ARM_V7S_RGN_WBWA 1 140 #define ARM_V7S_RGN_WT 2 141 #define ARM_V7S_RGN_WB 3 142 143 #define ARM_V7S_PRRR_TYPE_DEVICE 1 144 #define ARM_V7S_PRRR_TYPE_NORMAL 2 145 #define ARM_V7S_PRRR_TR(n, type) (((type) & 0x3) << ((n) * 2)) 146 #define ARM_V7S_PRRR_DS0 BIT(16) 147 #define ARM_V7S_PRRR_DS1 BIT(17) 148 #define ARM_V7S_PRRR_NS0 BIT(18) 149 #define ARM_V7S_PRRR_NS1 BIT(19) 150 #define ARM_V7S_PRRR_NOS(n) BIT((n) + 24) 151 152 #define ARM_V7S_NMRR_IR(n, attr) (((attr) & 0x3) << ((n) * 2)) 153 #define ARM_V7S_NMRR_OR(n, attr) (((attr) & 0x3) << ((n) * 2 + 16)) 154 155 #define ARM_V7S_TTBR_S BIT(1) 156 #define ARM_V7S_TTBR_NOS BIT(5) 157 #define ARM_V7S_TTBR_ORGN_ATTR(attr) (((attr) & 0x3) << 3) 158 #define ARM_V7S_TTBR_IRGN_ATTR(attr) \ 159 ((((attr) & 0x1) << 6) | (((attr) & 0x2) >> 1)) 160 161 #define ARM_V7S_TCR_PD1 BIT(5) 162 163 #ifdef CONFIG_ZONE_DMA32 164 #define ARM_V7S_TABLE_GFP_DMA GFP_DMA32 165 #define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA32 166 #else 167 #define ARM_V7S_TABLE_GFP_DMA GFP_DMA 168 #define ARM_V7S_TABLE_SLAB_FLAGS SLAB_CACHE_DMA 169 #endif 170 171 typedef u32 arm_v7s_iopte; 172 173 static bool selftest_running; 174 175 struct arm_v7s_io_pgtable { 176 struct io_pgtable iop; 177 178 arm_v7s_iopte *pgd; 179 struct kmem_cache *l2_tables; 180 spinlock_t split_lock; 181 }; 182 183 static dma_addr_t __arm_v7s_dma_addr(void *pages) 184 { 185 return (dma_addr_t)virt_to_phys(pages); 186 } 187 188 static arm_v7s_iopte *iopte_deref(arm_v7s_iopte pte, int lvl) 189 { 190 if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) 191 pte &= ARM_V7S_TABLE_MASK; 192 else 193 pte &= ARM_V7S_LVL_MASK(lvl); 194 return phys_to_virt(pte); 195 } 196 197 static void *__arm_v7s_alloc_table(int lvl, gfp_t gfp, 198 struct arm_v7s_io_pgtable *data) 199 { 200 struct io_pgtable_cfg *cfg = &data->iop.cfg; 201 struct device *dev = cfg->iommu_dev; 202 phys_addr_t phys; 203 dma_addr_t dma; 204 size_t size = ARM_V7S_TABLE_SIZE(lvl); 205 void *table = NULL; 206 207 if (lvl == 1) 208 table = (void *)__get_free_pages( 209 __GFP_ZERO | ARM_V7S_TABLE_GFP_DMA, get_order(size)); 210 else if (lvl == 2) 211 table = kmem_cache_zalloc(data->l2_tables, gfp); 212 phys = virt_to_phys(table); 213 if (phys != (arm_v7s_iopte)phys) { 214 /* Doesn't fit in PTE */ 215 dev_err(dev, "Page table does not fit in PTE: %pa", &phys); 216 goto out_free; 217 } 218 if (table && !(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) { 219 dma = dma_map_single(dev, table, size, DMA_TO_DEVICE); 220 if (dma_mapping_error(dev, dma)) 221 goto out_free; 222 /* 223 * We depend on the IOMMU being able to work with any physical 224 * address directly, so if the DMA layer suggests otherwise by 225 * translating or truncating them, that bodes very badly... 226 */ 227 if (dma != phys) 228 goto out_unmap; 229 } 230 if (lvl == 2) 231 kmemleak_ignore(table); 232 return table; 233 234 out_unmap: 235 dev_err(dev, "Cannot accommodate DMA translation for IOMMU page tables\n"); 236 dma_unmap_single(dev, dma, size, DMA_TO_DEVICE); 237 out_free: 238 if (lvl == 1) 239 free_pages((unsigned long)table, get_order(size)); 240 else 241 kmem_cache_free(data->l2_tables, table); 242 return NULL; 243 } 244 245 static void __arm_v7s_free_table(void *table, int lvl, 246 struct arm_v7s_io_pgtable *data) 247 { 248 struct io_pgtable_cfg *cfg = &data->iop.cfg; 249 struct device *dev = cfg->iommu_dev; 250 size_t size = ARM_V7S_TABLE_SIZE(lvl); 251 252 if (!(cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA)) 253 dma_unmap_single(dev, __arm_v7s_dma_addr(table), size, 254 DMA_TO_DEVICE); 255 if (lvl == 1) 256 free_pages((unsigned long)table, get_order(size)); 257 else 258 kmem_cache_free(data->l2_tables, table); 259 } 260 261 static void __arm_v7s_pte_sync(arm_v7s_iopte *ptep, int num_entries, 262 struct io_pgtable_cfg *cfg) 263 { 264 if (cfg->quirks & IO_PGTABLE_QUIRK_NO_DMA) 265 return; 266 267 dma_sync_single_for_device(cfg->iommu_dev, __arm_v7s_dma_addr(ptep), 268 num_entries * sizeof(*ptep), DMA_TO_DEVICE); 269 } 270 static void __arm_v7s_set_pte(arm_v7s_iopte *ptep, arm_v7s_iopte pte, 271 int num_entries, struct io_pgtable_cfg *cfg) 272 { 273 int i; 274 275 for (i = 0; i < num_entries; i++) 276 ptep[i] = pte; 277 278 __arm_v7s_pte_sync(ptep, num_entries, cfg); 279 } 280 281 static arm_v7s_iopte arm_v7s_prot_to_pte(int prot, int lvl, 282 struct io_pgtable_cfg *cfg) 283 { 284 bool ap = !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS); 285 arm_v7s_iopte pte = ARM_V7S_ATTR_NG | ARM_V7S_ATTR_S; 286 287 if (!(prot & IOMMU_MMIO)) 288 pte |= ARM_V7S_ATTR_TEX(1); 289 if (ap) { 290 pte |= ARM_V7S_PTE_AF; 291 if (!(prot & IOMMU_PRIV)) 292 pte |= ARM_V7S_PTE_AP_UNPRIV; 293 if (!(prot & IOMMU_WRITE)) 294 pte |= ARM_V7S_PTE_AP_RDONLY; 295 } 296 pte <<= ARM_V7S_ATTR_SHIFT(lvl); 297 298 if ((prot & IOMMU_NOEXEC) && ap) 299 pte |= ARM_V7S_ATTR_XN(lvl); 300 if (prot & IOMMU_MMIO) 301 pte |= ARM_V7S_ATTR_B; 302 else if (prot & IOMMU_CACHE) 303 pte |= ARM_V7S_ATTR_B | ARM_V7S_ATTR_C; 304 305 pte |= ARM_V7S_PTE_TYPE_PAGE; 306 if (lvl == 1 && (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS)) 307 pte |= ARM_V7S_ATTR_NS_SECTION; 308 309 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB) 310 pte |= ARM_V7S_ATTR_MTK_4GB; 311 312 return pte; 313 } 314 315 static int arm_v7s_pte_to_prot(arm_v7s_iopte pte, int lvl) 316 { 317 int prot = IOMMU_READ; 318 arm_v7s_iopte attr = pte >> ARM_V7S_ATTR_SHIFT(lvl); 319 320 if (!(attr & ARM_V7S_PTE_AP_RDONLY)) 321 prot |= IOMMU_WRITE; 322 if (!(attr & ARM_V7S_PTE_AP_UNPRIV)) 323 prot |= IOMMU_PRIV; 324 if ((attr & (ARM_V7S_TEX_MASK << ARM_V7S_TEX_SHIFT)) == 0) 325 prot |= IOMMU_MMIO; 326 else if (pte & ARM_V7S_ATTR_C) 327 prot |= IOMMU_CACHE; 328 if (pte & ARM_V7S_ATTR_XN(lvl)) 329 prot |= IOMMU_NOEXEC; 330 331 return prot; 332 } 333 334 static arm_v7s_iopte arm_v7s_pte_to_cont(arm_v7s_iopte pte, int lvl) 335 { 336 if (lvl == 1) { 337 pte |= ARM_V7S_CONT_SECTION; 338 } else if (lvl == 2) { 339 arm_v7s_iopte xn = pte & ARM_V7S_ATTR_XN(lvl); 340 arm_v7s_iopte tex = pte & ARM_V7S_CONT_PAGE_TEX_MASK; 341 342 pte ^= xn | tex | ARM_V7S_PTE_TYPE_PAGE; 343 pte |= (xn << ARM_V7S_CONT_PAGE_XN_SHIFT) | 344 (tex << ARM_V7S_CONT_PAGE_TEX_SHIFT) | 345 ARM_V7S_PTE_TYPE_CONT_PAGE; 346 } 347 return pte; 348 } 349 350 static arm_v7s_iopte arm_v7s_cont_to_pte(arm_v7s_iopte pte, int lvl) 351 { 352 if (lvl == 1) { 353 pte &= ~ARM_V7S_CONT_SECTION; 354 } else if (lvl == 2) { 355 arm_v7s_iopte xn = pte & BIT(ARM_V7S_CONT_PAGE_XN_SHIFT); 356 arm_v7s_iopte tex = pte & (ARM_V7S_CONT_PAGE_TEX_MASK << 357 ARM_V7S_CONT_PAGE_TEX_SHIFT); 358 359 pte ^= xn | tex | ARM_V7S_PTE_TYPE_CONT_PAGE; 360 pte |= (xn >> ARM_V7S_CONT_PAGE_XN_SHIFT) | 361 (tex >> ARM_V7S_CONT_PAGE_TEX_SHIFT) | 362 ARM_V7S_PTE_TYPE_PAGE; 363 } 364 return pte; 365 } 366 367 static bool arm_v7s_pte_is_cont(arm_v7s_iopte pte, int lvl) 368 { 369 if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte, lvl)) 370 return pte & ARM_V7S_CONT_SECTION; 371 else if (lvl == 2) 372 return !(pte & ARM_V7S_PTE_TYPE_PAGE); 373 return false; 374 } 375 376 static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *, unsigned long, 377 size_t, int, arm_v7s_iopte *); 378 379 static int arm_v7s_init_pte(struct arm_v7s_io_pgtable *data, 380 unsigned long iova, phys_addr_t paddr, int prot, 381 int lvl, int num_entries, arm_v7s_iopte *ptep) 382 { 383 struct io_pgtable_cfg *cfg = &data->iop.cfg; 384 arm_v7s_iopte pte; 385 int i; 386 387 for (i = 0; i < num_entries; i++) 388 if (ARM_V7S_PTE_IS_TABLE(ptep[i], lvl)) { 389 /* 390 * We need to unmap and free the old table before 391 * overwriting it with a block entry. 392 */ 393 arm_v7s_iopte *tblp; 394 size_t sz = ARM_V7S_BLOCK_SIZE(lvl); 395 396 tblp = ptep - ARM_V7S_LVL_IDX(iova, lvl); 397 if (WARN_ON(__arm_v7s_unmap(data, iova + i * sz, 398 sz, lvl, tblp) != sz)) 399 return -EINVAL; 400 } else if (ptep[i]) { 401 /* We require an unmap first */ 402 WARN_ON(!selftest_running); 403 return -EEXIST; 404 } 405 406 pte = arm_v7s_prot_to_pte(prot, lvl, cfg); 407 if (num_entries > 1) 408 pte = arm_v7s_pte_to_cont(pte, lvl); 409 410 pte |= paddr & ARM_V7S_LVL_MASK(lvl); 411 412 __arm_v7s_set_pte(ptep, pte, num_entries, cfg); 413 return 0; 414 } 415 416 static arm_v7s_iopte arm_v7s_install_table(arm_v7s_iopte *table, 417 arm_v7s_iopte *ptep, 418 arm_v7s_iopte curr, 419 struct io_pgtable_cfg *cfg) 420 { 421 arm_v7s_iopte old, new; 422 423 new = virt_to_phys(table) | ARM_V7S_PTE_TYPE_TABLE; 424 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_NS) 425 new |= ARM_V7S_ATTR_NS_TABLE; 426 427 /* 428 * Ensure the table itself is visible before its PTE can be. 429 * Whilst we could get away with cmpxchg64_release below, this 430 * doesn't have any ordering semantics when !CONFIG_SMP. 431 */ 432 dma_wmb(); 433 434 old = cmpxchg_relaxed(ptep, curr, new); 435 __arm_v7s_pte_sync(ptep, 1, cfg); 436 437 return old; 438 } 439 440 static int __arm_v7s_map(struct arm_v7s_io_pgtable *data, unsigned long iova, 441 phys_addr_t paddr, size_t size, int prot, 442 int lvl, arm_v7s_iopte *ptep) 443 { 444 struct io_pgtable_cfg *cfg = &data->iop.cfg; 445 arm_v7s_iopte pte, *cptep; 446 int num_entries = size >> ARM_V7S_LVL_SHIFT(lvl); 447 448 /* Find our entry at the current level */ 449 ptep += ARM_V7S_LVL_IDX(iova, lvl); 450 451 /* If we can install a leaf entry at this level, then do so */ 452 if (num_entries) 453 return arm_v7s_init_pte(data, iova, paddr, prot, 454 lvl, num_entries, ptep); 455 456 /* We can't allocate tables at the final level */ 457 if (WARN_ON(lvl == 2)) 458 return -EINVAL; 459 460 /* Grab a pointer to the next level */ 461 pte = READ_ONCE(*ptep); 462 if (!pte) { 463 cptep = __arm_v7s_alloc_table(lvl + 1, GFP_ATOMIC, data); 464 if (!cptep) 465 return -ENOMEM; 466 467 pte = arm_v7s_install_table(cptep, ptep, 0, cfg); 468 if (pte) 469 __arm_v7s_free_table(cptep, lvl + 1, data); 470 } else { 471 /* We've no easy way of knowing if it's synced yet, so... */ 472 __arm_v7s_pte_sync(ptep, 1, cfg); 473 } 474 475 if (ARM_V7S_PTE_IS_TABLE(pte, lvl)) { 476 cptep = iopte_deref(pte, lvl); 477 } else if (pte) { 478 /* We require an unmap first */ 479 WARN_ON(!selftest_running); 480 return -EEXIST; 481 } 482 483 /* Rinse, repeat */ 484 return __arm_v7s_map(data, iova, paddr, size, prot, lvl + 1, cptep); 485 } 486 487 static int arm_v7s_map(struct io_pgtable_ops *ops, unsigned long iova, 488 phys_addr_t paddr, size_t size, int prot) 489 { 490 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); 491 struct io_pgtable *iop = &data->iop; 492 int ret; 493 494 /* If no access, then nothing to do */ 495 if (!(prot & (IOMMU_READ | IOMMU_WRITE))) 496 return 0; 497 498 if (WARN_ON(upper_32_bits(iova) || upper_32_bits(paddr))) 499 return -ERANGE; 500 501 ret = __arm_v7s_map(data, iova, paddr, size, prot, 1, data->pgd); 502 /* 503 * Synchronise all PTE updates for the new mapping before there's 504 * a chance for anything to kick off a table walk for the new iova. 505 */ 506 if (iop->cfg.quirks & IO_PGTABLE_QUIRK_TLBI_ON_MAP) { 507 io_pgtable_tlb_add_flush(iop, iova, size, 508 ARM_V7S_BLOCK_SIZE(2), false); 509 io_pgtable_tlb_sync(iop); 510 } else { 511 wmb(); 512 } 513 514 return ret; 515 } 516 517 static void arm_v7s_free_pgtable(struct io_pgtable *iop) 518 { 519 struct arm_v7s_io_pgtable *data = io_pgtable_to_data(iop); 520 int i; 521 522 for (i = 0; i < ARM_V7S_PTES_PER_LVL(1); i++) { 523 arm_v7s_iopte pte = data->pgd[i]; 524 525 if (ARM_V7S_PTE_IS_TABLE(pte, 1)) 526 __arm_v7s_free_table(iopte_deref(pte, 1), 2, data); 527 } 528 __arm_v7s_free_table(data->pgd, 1, data); 529 kmem_cache_destroy(data->l2_tables); 530 kfree(data); 531 } 532 533 static arm_v7s_iopte arm_v7s_split_cont(struct arm_v7s_io_pgtable *data, 534 unsigned long iova, int idx, int lvl, 535 arm_v7s_iopte *ptep) 536 { 537 struct io_pgtable *iop = &data->iop; 538 arm_v7s_iopte pte; 539 size_t size = ARM_V7S_BLOCK_SIZE(lvl); 540 int i; 541 542 /* Check that we didn't lose a race to get the lock */ 543 pte = *ptep; 544 if (!arm_v7s_pte_is_cont(pte, lvl)) 545 return pte; 546 547 ptep -= idx & (ARM_V7S_CONT_PAGES - 1); 548 pte = arm_v7s_cont_to_pte(pte, lvl); 549 for (i = 0; i < ARM_V7S_CONT_PAGES; i++) 550 ptep[i] = pte + i * size; 551 552 __arm_v7s_pte_sync(ptep, ARM_V7S_CONT_PAGES, &iop->cfg); 553 554 size *= ARM_V7S_CONT_PAGES; 555 io_pgtable_tlb_add_flush(iop, iova, size, size, true); 556 io_pgtable_tlb_sync(iop); 557 return pte; 558 } 559 560 static size_t arm_v7s_split_blk_unmap(struct arm_v7s_io_pgtable *data, 561 unsigned long iova, size_t size, 562 arm_v7s_iopte blk_pte, 563 arm_v7s_iopte *ptep) 564 { 565 struct io_pgtable_cfg *cfg = &data->iop.cfg; 566 arm_v7s_iopte pte, *tablep; 567 int i, unmap_idx, num_entries, num_ptes; 568 569 tablep = __arm_v7s_alloc_table(2, GFP_ATOMIC, data); 570 if (!tablep) 571 return 0; /* Bytes unmapped */ 572 573 num_ptes = ARM_V7S_PTES_PER_LVL(2); 574 num_entries = size >> ARM_V7S_LVL_SHIFT(2); 575 unmap_idx = ARM_V7S_LVL_IDX(iova, 2); 576 577 pte = arm_v7s_prot_to_pte(arm_v7s_pte_to_prot(blk_pte, 1), 2, cfg); 578 if (num_entries > 1) 579 pte = arm_v7s_pte_to_cont(pte, 2); 580 581 for (i = 0; i < num_ptes; i += num_entries, pte += size) { 582 /* Unmap! */ 583 if (i == unmap_idx) 584 continue; 585 586 __arm_v7s_set_pte(&tablep[i], pte, num_entries, cfg); 587 } 588 589 pte = arm_v7s_install_table(tablep, ptep, blk_pte, cfg); 590 if (pte != blk_pte) { 591 __arm_v7s_free_table(tablep, 2, data); 592 593 if (!ARM_V7S_PTE_IS_TABLE(pte, 1)) 594 return 0; 595 596 tablep = iopte_deref(pte, 1); 597 return __arm_v7s_unmap(data, iova, size, 2, tablep); 598 } 599 600 io_pgtable_tlb_add_flush(&data->iop, iova, size, size, true); 601 io_pgtable_tlb_sync(&data->iop); 602 return size; 603 } 604 605 static size_t __arm_v7s_unmap(struct arm_v7s_io_pgtable *data, 606 unsigned long iova, size_t size, int lvl, 607 arm_v7s_iopte *ptep) 608 { 609 arm_v7s_iopte pte[ARM_V7S_CONT_PAGES]; 610 struct io_pgtable *iop = &data->iop; 611 int idx, i = 0, num_entries = size >> ARM_V7S_LVL_SHIFT(lvl); 612 613 /* Something went horribly wrong and we ran out of page table */ 614 if (WARN_ON(lvl > 2)) 615 return 0; 616 617 idx = ARM_V7S_LVL_IDX(iova, lvl); 618 ptep += idx; 619 do { 620 pte[i] = READ_ONCE(ptep[i]); 621 if (WARN_ON(!ARM_V7S_PTE_IS_VALID(pte[i]))) 622 return 0; 623 } while (++i < num_entries); 624 625 /* 626 * If we've hit a contiguous 'large page' entry at this level, it 627 * needs splitting first, unless we're unmapping the whole lot. 628 * 629 * For splitting, we can't rewrite 16 PTEs atomically, and since we 630 * can't necessarily assume TEX remap we don't have a software bit to 631 * mark live entries being split. In practice (i.e. DMA API code), we 632 * will never be splitting large pages anyway, so just wrap this edge 633 * case in a lock for the sake of correctness and be done with it. 634 */ 635 if (num_entries <= 1 && arm_v7s_pte_is_cont(pte[0], lvl)) { 636 unsigned long flags; 637 638 spin_lock_irqsave(&data->split_lock, flags); 639 pte[0] = arm_v7s_split_cont(data, iova, idx, lvl, ptep); 640 spin_unlock_irqrestore(&data->split_lock, flags); 641 } 642 643 /* If the size matches this level, we're in the right place */ 644 if (num_entries) { 645 size_t blk_size = ARM_V7S_BLOCK_SIZE(lvl); 646 647 __arm_v7s_set_pte(ptep, 0, num_entries, &iop->cfg); 648 649 for (i = 0; i < num_entries; i++) { 650 if (ARM_V7S_PTE_IS_TABLE(pte[i], lvl)) { 651 /* Also flush any partial walks */ 652 io_pgtable_tlb_add_flush(iop, iova, blk_size, 653 ARM_V7S_BLOCK_SIZE(lvl + 1), false); 654 io_pgtable_tlb_sync(iop); 655 ptep = iopte_deref(pte[i], lvl); 656 __arm_v7s_free_table(ptep, lvl + 1, data); 657 } else if (iop->cfg.quirks & IO_PGTABLE_QUIRK_NON_STRICT) { 658 /* 659 * Order the PTE update against queueing the IOVA, to 660 * guarantee that a flush callback from a different CPU 661 * has observed it before the TLBIALL can be issued. 662 */ 663 smp_wmb(); 664 } else { 665 io_pgtable_tlb_add_flush(iop, iova, blk_size, 666 blk_size, true); 667 } 668 iova += blk_size; 669 } 670 return size; 671 } else if (lvl == 1 && !ARM_V7S_PTE_IS_TABLE(pte[0], lvl)) { 672 /* 673 * Insert a table at the next level to map the old region, 674 * minus the part we want to unmap 675 */ 676 return arm_v7s_split_blk_unmap(data, iova, size, pte[0], ptep); 677 } 678 679 /* Keep on walkin' */ 680 ptep = iopte_deref(pte[0], lvl); 681 return __arm_v7s_unmap(data, iova, size, lvl + 1, ptep); 682 } 683 684 static size_t arm_v7s_unmap(struct io_pgtable_ops *ops, unsigned long iova, 685 size_t size) 686 { 687 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); 688 689 if (WARN_ON(upper_32_bits(iova))) 690 return 0; 691 692 return __arm_v7s_unmap(data, iova, size, 1, data->pgd); 693 } 694 695 static phys_addr_t arm_v7s_iova_to_phys(struct io_pgtable_ops *ops, 696 unsigned long iova) 697 { 698 struct arm_v7s_io_pgtable *data = io_pgtable_ops_to_data(ops); 699 arm_v7s_iopte *ptep = data->pgd, pte; 700 int lvl = 0; 701 u32 mask; 702 703 do { 704 ptep += ARM_V7S_LVL_IDX(iova, ++lvl); 705 pte = READ_ONCE(*ptep); 706 ptep = iopte_deref(pte, lvl); 707 } while (ARM_V7S_PTE_IS_TABLE(pte, lvl)); 708 709 if (!ARM_V7S_PTE_IS_VALID(pte)) 710 return 0; 711 712 mask = ARM_V7S_LVL_MASK(lvl); 713 if (arm_v7s_pte_is_cont(pte, lvl)) 714 mask *= ARM_V7S_CONT_PAGES; 715 return (pte & mask) | (iova & ~mask); 716 } 717 718 static struct io_pgtable *arm_v7s_alloc_pgtable(struct io_pgtable_cfg *cfg, 719 void *cookie) 720 { 721 struct arm_v7s_io_pgtable *data; 722 723 if (cfg->ias > ARM_V7S_ADDR_BITS || cfg->oas > ARM_V7S_ADDR_BITS) 724 return NULL; 725 726 if (cfg->quirks & ~(IO_PGTABLE_QUIRK_ARM_NS | 727 IO_PGTABLE_QUIRK_NO_PERMS | 728 IO_PGTABLE_QUIRK_TLBI_ON_MAP | 729 IO_PGTABLE_QUIRK_ARM_MTK_4GB | 730 IO_PGTABLE_QUIRK_NO_DMA | 731 IO_PGTABLE_QUIRK_NON_STRICT)) 732 return NULL; 733 734 /* If ARM_MTK_4GB is enabled, the NO_PERMS is also expected. */ 735 if (cfg->quirks & IO_PGTABLE_QUIRK_ARM_MTK_4GB && 736 !(cfg->quirks & IO_PGTABLE_QUIRK_NO_PERMS)) 737 return NULL; 738 739 data = kmalloc(sizeof(*data), GFP_KERNEL); 740 if (!data) 741 return NULL; 742 743 spin_lock_init(&data->split_lock); 744 data->l2_tables = kmem_cache_create("io-pgtable_armv7s_l2", 745 ARM_V7S_TABLE_SIZE(2), 746 ARM_V7S_TABLE_SIZE(2), 747 ARM_V7S_TABLE_SLAB_FLAGS, NULL); 748 if (!data->l2_tables) 749 goto out_free_data; 750 751 data->iop.ops = (struct io_pgtable_ops) { 752 .map = arm_v7s_map, 753 .unmap = arm_v7s_unmap, 754 .iova_to_phys = arm_v7s_iova_to_phys, 755 }; 756 757 /* We have to do this early for __arm_v7s_alloc_table to work... */ 758 data->iop.cfg = *cfg; 759 760 /* 761 * Unless the IOMMU driver indicates supersection support by 762 * having SZ_16M set in the initial bitmap, they won't be used. 763 */ 764 cfg->pgsize_bitmap &= SZ_4K | SZ_64K | SZ_1M | SZ_16M; 765 766 /* TCR: T0SZ=0, disable TTBR1 */ 767 cfg->arm_v7s_cfg.tcr = ARM_V7S_TCR_PD1; 768 769 /* 770 * TEX remap: the indices used map to the closest equivalent types 771 * under the non-TEX-remap interpretation of those attribute bits, 772 * excepting various implementation-defined aspects of shareability. 773 */ 774 cfg->arm_v7s_cfg.prrr = ARM_V7S_PRRR_TR(1, ARM_V7S_PRRR_TYPE_DEVICE) | 775 ARM_V7S_PRRR_TR(4, ARM_V7S_PRRR_TYPE_NORMAL) | 776 ARM_V7S_PRRR_TR(7, ARM_V7S_PRRR_TYPE_NORMAL) | 777 ARM_V7S_PRRR_DS0 | ARM_V7S_PRRR_DS1 | 778 ARM_V7S_PRRR_NS1 | ARM_V7S_PRRR_NOS(7); 779 cfg->arm_v7s_cfg.nmrr = ARM_V7S_NMRR_IR(7, ARM_V7S_RGN_WBWA) | 780 ARM_V7S_NMRR_OR(7, ARM_V7S_RGN_WBWA); 781 782 /* Looking good; allocate a pgd */ 783 data->pgd = __arm_v7s_alloc_table(1, GFP_KERNEL, data); 784 if (!data->pgd) 785 goto out_free_data; 786 787 /* Ensure the empty pgd is visible before any actual TTBR write */ 788 wmb(); 789 790 /* TTBRs */ 791 cfg->arm_v7s_cfg.ttbr[0] = virt_to_phys(data->pgd) | 792 ARM_V7S_TTBR_S | ARM_V7S_TTBR_NOS | 793 ARM_V7S_TTBR_IRGN_ATTR(ARM_V7S_RGN_WBWA) | 794 ARM_V7S_TTBR_ORGN_ATTR(ARM_V7S_RGN_WBWA); 795 cfg->arm_v7s_cfg.ttbr[1] = 0; 796 return &data->iop; 797 798 out_free_data: 799 kmem_cache_destroy(data->l2_tables); 800 kfree(data); 801 return NULL; 802 } 803 804 struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns = { 805 .alloc = arm_v7s_alloc_pgtable, 806 .free = arm_v7s_free_pgtable, 807 }; 808 809 #ifdef CONFIG_IOMMU_IO_PGTABLE_ARMV7S_SELFTEST 810 811 static struct io_pgtable_cfg *cfg_cookie; 812 813 static void dummy_tlb_flush_all(void *cookie) 814 { 815 WARN_ON(cookie != cfg_cookie); 816 } 817 818 static void dummy_tlb_add_flush(unsigned long iova, size_t size, 819 size_t granule, bool leaf, void *cookie) 820 { 821 WARN_ON(cookie != cfg_cookie); 822 WARN_ON(!(size & cfg_cookie->pgsize_bitmap)); 823 } 824 825 static void dummy_tlb_sync(void *cookie) 826 { 827 WARN_ON(cookie != cfg_cookie); 828 } 829 830 static const struct iommu_gather_ops dummy_tlb_ops = { 831 .tlb_flush_all = dummy_tlb_flush_all, 832 .tlb_add_flush = dummy_tlb_add_flush, 833 .tlb_sync = dummy_tlb_sync, 834 }; 835 836 #define __FAIL(ops) ({ \ 837 WARN(1, "selftest: test failed\n"); \ 838 selftest_running = false; \ 839 -EFAULT; \ 840 }) 841 842 static int __init arm_v7s_do_selftests(void) 843 { 844 struct io_pgtable_ops *ops; 845 struct io_pgtable_cfg cfg = { 846 .tlb = &dummy_tlb_ops, 847 .oas = 32, 848 .ias = 32, 849 .quirks = IO_PGTABLE_QUIRK_ARM_NS | IO_PGTABLE_QUIRK_NO_DMA, 850 .pgsize_bitmap = SZ_4K | SZ_64K | SZ_1M | SZ_16M, 851 }; 852 unsigned int iova, size, iova_start; 853 unsigned int i, loopnr = 0; 854 855 selftest_running = true; 856 857 cfg_cookie = &cfg; 858 859 ops = alloc_io_pgtable_ops(ARM_V7S, &cfg, &cfg); 860 if (!ops) { 861 pr_err("selftest: failed to allocate io pgtable ops\n"); 862 return -EINVAL; 863 } 864 865 /* 866 * Initial sanity checks. 867 * Empty page tables shouldn't provide any translations. 868 */ 869 if (ops->iova_to_phys(ops, 42)) 870 return __FAIL(ops); 871 872 if (ops->iova_to_phys(ops, SZ_1G + 42)) 873 return __FAIL(ops); 874 875 if (ops->iova_to_phys(ops, SZ_2G + 42)) 876 return __FAIL(ops); 877 878 /* 879 * Distinct mappings of different granule sizes. 880 */ 881 iova = 0; 882 for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) { 883 size = 1UL << i; 884 if (ops->map(ops, iova, iova, size, IOMMU_READ | 885 IOMMU_WRITE | 886 IOMMU_NOEXEC | 887 IOMMU_CACHE)) 888 return __FAIL(ops); 889 890 /* Overlapping mappings */ 891 if (!ops->map(ops, iova, iova + size, size, 892 IOMMU_READ | IOMMU_NOEXEC)) 893 return __FAIL(ops); 894 895 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) 896 return __FAIL(ops); 897 898 iova += SZ_16M; 899 loopnr++; 900 } 901 902 /* Partial unmap */ 903 i = 1; 904 size = 1UL << __ffs(cfg.pgsize_bitmap); 905 while (i < loopnr) { 906 iova_start = i * SZ_16M; 907 if (ops->unmap(ops, iova_start + size, size) != size) 908 return __FAIL(ops); 909 910 /* Remap of partial unmap */ 911 if (ops->map(ops, iova_start + size, size, size, IOMMU_READ)) 912 return __FAIL(ops); 913 914 if (ops->iova_to_phys(ops, iova_start + size + 42) 915 != (size + 42)) 916 return __FAIL(ops); 917 i++; 918 } 919 920 /* Full unmap */ 921 iova = 0; 922 for_each_set_bit(i, &cfg.pgsize_bitmap, BITS_PER_LONG) { 923 size = 1UL << i; 924 925 if (ops->unmap(ops, iova, size) != size) 926 return __FAIL(ops); 927 928 if (ops->iova_to_phys(ops, iova + 42)) 929 return __FAIL(ops); 930 931 /* Remap full block */ 932 if (ops->map(ops, iova, iova, size, IOMMU_WRITE)) 933 return __FAIL(ops); 934 935 if (ops->iova_to_phys(ops, iova + 42) != (iova + 42)) 936 return __FAIL(ops); 937 938 iova += SZ_16M; 939 } 940 941 free_io_pgtable_ops(ops); 942 943 selftest_running = false; 944 945 pr_info("self test ok\n"); 946 return 0; 947 } 948 subsys_initcall(arm_v7s_do_selftests); 949 #endif 950