1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * arch/powerpc/sysdev/dart_iommu.c 4 * 5 * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation 6 * Copyright (C) 2005 Benjamin Herrenschmidt <benh@kernel.crashing.org>, 7 * IBM Corporation 8 * 9 * Based on pSeries_iommu.c: 10 * Copyright (C) 2001 Mike Corrigan & Dave Engebretsen, IBM Corporation 11 * Copyright (C) 2004 Olof Johansson <olof@lixom.net>, IBM Corporation 12 * 13 * Dynamic DMA mapping support, Apple U3, U4 & IBM CPC925 "DART" iommu. 14 */ 15 16 #include <linux/init.h> 17 #include <linux/types.h> 18 #include <linux/mm.h> 19 #include <linux/spinlock.h> 20 #include <linux/string.h> 21 #include <linux/pci.h> 22 #include <linux/dma-mapping.h> 23 #include <linux/vmalloc.h> 24 #include <linux/suspend.h> 25 #include <linux/memblock.h> 26 #include <linux/gfp.h> 27 #include <linux/kmemleak.h> 28 #include <asm/io.h> 29 #include <asm/prom.h> 30 #include <asm/iommu.h> 31 #include <asm/pci-bridge.h> 32 #include <asm/machdep.h> 33 #include <asm/cacheflush.h> 34 #include <asm/ppc-pci.h> 35 36 #include "dart.h" 37 38 /* DART table address and size */ 39 static u32 *dart_tablebase; 40 static unsigned long dart_tablesize; 41 42 /* Mapped base address for the dart */ 43 static unsigned int __iomem *dart; 44 45 /* Dummy val that entries are set to when unused */ 46 static unsigned int dart_emptyval; 47 48 static struct iommu_table iommu_table_dart; 49 static int iommu_table_dart_inited; 50 static int dart_dirty; 51 static int dart_is_u4; 52 53 #define DART_U4_BYPASS_BASE 0x8000000000ull 54 55 #define DBG(...) 56 57 static DEFINE_SPINLOCK(invalidate_lock); 58 59 static inline void dart_tlb_invalidate_all(void) 60 { 61 unsigned long l = 0; 62 unsigned int reg, inv_bit; 63 unsigned long limit; 64 unsigned long flags; 65 66 spin_lock_irqsave(&invalidate_lock, flags); 67 68 DBG("dart: flush\n"); 69 70 /* To invalidate the DART, set the DARTCNTL_FLUSHTLB bit in the 71 * control register and wait for it to clear. 72 * 73 * Gotcha: Sometimes, the DART won't detect that the bit gets 74 * set. If so, clear it and set it again. 75 */ 76 77 limit = 0; 78 79 inv_bit = dart_is_u4 ? DART_CNTL_U4_FLUSHTLB : DART_CNTL_U3_FLUSHTLB; 80 retry: 81 l = 0; 82 reg = DART_IN(DART_CNTL); 83 reg |= inv_bit; 84 DART_OUT(DART_CNTL, reg); 85 86 while ((DART_IN(DART_CNTL) & inv_bit) && l < (1L << limit)) 87 l++; 88 if (l == (1L << limit)) { 89 if (limit < 4) { 90 limit++; 91 reg = DART_IN(DART_CNTL); 92 reg &= ~inv_bit; 93 DART_OUT(DART_CNTL, reg); 94 goto retry; 95 } else 96 panic("DART: TLB did not flush after waiting a long " 97 "time. Buggy U3 ?"); 98 } 99 100 spin_unlock_irqrestore(&invalidate_lock, flags); 101 } 102 103 static inline void dart_tlb_invalidate_one(unsigned long bus_rpn) 104 { 105 unsigned int reg; 106 unsigned int l, limit; 107 unsigned long flags; 108 109 spin_lock_irqsave(&invalidate_lock, flags); 110 111 reg = DART_CNTL_U4_ENABLE | DART_CNTL_U4_IONE | 112 (bus_rpn & DART_CNTL_U4_IONE_MASK); 113 DART_OUT(DART_CNTL, reg); 114 115 limit = 0; 116 wait_more: 117 l = 0; 118 while ((DART_IN(DART_CNTL) & DART_CNTL_U4_IONE) && l < (1L << limit)) { 119 rmb(); 120 l++; 121 } 122 123 if (l == (1L << limit)) { 124 if (limit < 4) { 125 limit++; 126 goto wait_more; 127 } else 128 panic("DART: TLB did not flush after waiting a long " 129 "time. Buggy U4 ?"); 130 } 131 132 spin_unlock_irqrestore(&invalidate_lock, flags); 133 } 134 135 static void dart_cache_sync(unsigned int *base, unsigned int count) 136 { 137 /* 138 * We add 1 to the number of entries to flush, following a 139 * comment in Darwin indicating that the memory controller 140 * can prefetch unmapped memory under some circumstances. 141 */ 142 unsigned long start = (unsigned long)base; 143 unsigned long end = start + (count + 1) * sizeof(unsigned int); 144 unsigned int tmp; 145 146 /* Perform a standard cache flush */ 147 flush_dcache_range(start, end); 148 149 /* 150 * Perform the sequence described in the CPC925 manual to 151 * ensure all the data gets to a point the cache incoherent 152 * DART hardware will see. 153 */ 154 asm volatile(" sync;" 155 " isync;" 156 " dcbf 0,%1;" 157 " sync;" 158 " isync;" 159 " lwz %0,0(%1);" 160 " isync" : "=r" (tmp) : "r" (end) : "memory"); 161 } 162 163 static void dart_flush(struct iommu_table *tbl) 164 { 165 mb(); 166 if (dart_dirty) { 167 dart_tlb_invalidate_all(); 168 dart_dirty = 0; 169 } 170 } 171 172 static int dart_build(struct iommu_table *tbl, long index, 173 long npages, unsigned long uaddr, 174 enum dma_data_direction direction, 175 unsigned long attrs) 176 { 177 unsigned int *dp, *orig_dp; 178 unsigned int rpn; 179 long l; 180 181 DBG("dart: build at: %lx, %lx, addr: %x\n", index, npages, uaddr); 182 183 orig_dp = dp = ((unsigned int*)tbl->it_base) + index; 184 185 /* On U3, all memory is contiguous, so we can move this 186 * out of the loop. 187 */ 188 l = npages; 189 while (l--) { 190 rpn = __pa(uaddr) >> DART_PAGE_SHIFT; 191 192 *(dp++) = DARTMAP_VALID | (rpn & DARTMAP_RPNMASK); 193 194 uaddr += DART_PAGE_SIZE; 195 } 196 dart_cache_sync(orig_dp, npages); 197 198 if (dart_is_u4) { 199 rpn = index; 200 while (npages--) 201 dart_tlb_invalidate_one(rpn++); 202 } else { 203 dart_dirty = 1; 204 } 205 return 0; 206 } 207 208 209 static void dart_free(struct iommu_table *tbl, long index, long npages) 210 { 211 unsigned int *dp, *orig_dp; 212 long orig_npages = npages; 213 214 /* We don't worry about flushing the TLB cache. The only drawback of 215 * not doing it is that we won't catch buggy device drivers doing 216 * bad DMAs, but then no 32-bit architecture ever does either. 217 */ 218 219 DBG("dart: free at: %lx, %lx\n", index, npages); 220 221 orig_dp = dp = ((unsigned int *)tbl->it_base) + index; 222 223 while (npages--) 224 *(dp++) = dart_emptyval; 225 226 dart_cache_sync(orig_dp, orig_npages); 227 } 228 229 static void __init allocate_dart(void) 230 { 231 unsigned long tmp; 232 233 /* 512 pages (2MB) is max DART tablesize. */ 234 dart_tablesize = 1UL << 21; 235 236 /* 237 * 16MB (1 << 24) alignment. We allocate a full 16Mb chuck since we 238 * will blow up an entire large page anyway in the kernel mapping. 239 */ 240 dart_tablebase = memblock_alloc_try_nid_raw(SZ_16M, SZ_16M, 241 MEMBLOCK_LOW_LIMIT, SZ_2G, 242 NUMA_NO_NODE); 243 if (!dart_tablebase) 244 panic("Failed to allocate 16MB below 2GB for DART table\n"); 245 246 /* There is no point scanning the DART space for leaks*/ 247 kmemleak_no_scan((void *)dart_tablebase); 248 249 /* Allocate a spare page to map all invalid DART pages. We need to do 250 * that to work around what looks like a problem with the HT bridge 251 * prefetching into invalid pages and corrupting data 252 */ 253 tmp = memblock_phys_alloc(DART_PAGE_SIZE, DART_PAGE_SIZE); 254 if (!tmp) 255 panic("DART: table allocation failed\n"); 256 257 dart_emptyval = DARTMAP_VALID | ((tmp >> DART_PAGE_SHIFT) & 258 DARTMAP_RPNMASK); 259 260 printk(KERN_INFO "DART table allocated at: %p\n", dart_tablebase); 261 } 262 263 static int __init dart_init(struct device_node *dart_node) 264 { 265 unsigned int i; 266 unsigned long base, size; 267 struct resource r; 268 269 /* IOMMU disabled by the user ? bail out */ 270 if (iommu_is_off) 271 return -ENODEV; 272 273 /* 274 * Only use the DART if the machine has more than 1GB of RAM 275 * or if requested with iommu=on on cmdline. 276 * 277 * 1GB of RAM is picked as limit because some default devices 278 * (i.e. Airport Extreme) have 30 bit address range limits. 279 */ 280 281 if (!iommu_force_on && memblock_end_of_DRAM() <= 0x40000000ull) 282 return -ENODEV; 283 284 /* Get DART registers */ 285 if (of_address_to_resource(dart_node, 0, &r)) 286 panic("DART: can't get register base ! "); 287 288 /* Map in DART registers */ 289 dart = ioremap(r.start, resource_size(&r)); 290 if (dart == NULL) 291 panic("DART: Cannot map registers!"); 292 293 /* Allocate the DART and dummy page */ 294 allocate_dart(); 295 296 /* Fill initial table */ 297 for (i = 0; i < dart_tablesize/4; i++) 298 dart_tablebase[i] = dart_emptyval; 299 300 /* Push to memory */ 301 dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32)); 302 303 /* Initialize DART with table base and enable it. */ 304 base = ((unsigned long)dart_tablebase) >> DART_PAGE_SHIFT; 305 size = dart_tablesize >> DART_PAGE_SHIFT; 306 if (dart_is_u4) { 307 size &= DART_SIZE_U4_SIZE_MASK; 308 DART_OUT(DART_BASE_U4, base); 309 DART_OUT(DART_SIZE_U4, size); 310 DART_OUT(DART_CNTL, DART_CNTL_U4_ENABLE); 311 } else { 312 size &= DART_CNTL_U3_SIZE_MASK; 313 DART_OUT(DART_CNTL, 314 DART_CNTL_U3_ENABLE | 315 (base << DART_CNTL_U3_BASE_SHIFT) | 316 (size << DART_CNTL_U3_SIZE_SHIFT)); 317 } 318 319 /* Invalidate DART to get rid of possible stale TLBs */ 320 dart_tlb_invalidate_all(); 321 322 printk(KERN_INFO "DART IOMMU initialized for %s type chipset\n", 323 dart_is_u4 ? "U4" : "U3"); 324 325 return 0; 326 } 327 328 static struct iommu_table_ops iommu_dart_ops = { 329 .set = dart_build, 330 .clear = dart_free, 331 .flush = dart_flush, 332 }; 333 334 static void iommu_table_dart_setup(void) 335 { 336 iommu_table_dart.it_busno = 0; 337 iommu_table_dart.it_offset = 0; 338 /* it_size is in number of entries */ 339 iommu_table_dart.it_size = dart_tablesize / sizeof(u32); 340 iommu_table_dart.it_page_shift = IOMMU_PAGE_SHIFT_4K; 341 342 /* Initialize the common IOMMU code */ 343 iommu_table_dart.it_base = (unsigned long)dart_tablebase; 344 iommu_table_dart.it_index = 0; 345 iommu_table_dart.it_blocksize = 1; 346 iommu_table_dart.it_ops = &iommu_dart_ops; 347 if (!iommu_init_table(&iommu_table_dart, -1, 0, 0)) 348 panic("Failed to initialize iommu table"); 349 350 /* Reserve the last page of the DART to avoid possible prefetch 351 * past the DART mapped area 352 */ 353 set_bit(iommu_table_dart.it_size - 1, iommu_table_dart.it_map); 354 } 355 356 static void pci_dma_bus_setup_dart(struct pci_bus *bus) 357 { 358 if (!iommu_table_dart_inited) { 359 iommu_table_dart_inited = 1; 360 iommu_table_dart_setup(); 361 } 362 } 363 364 static bool dart_device_on_pcie(struct device *dev) 365 { 366 struct device_node *np = of_node_get(dev->of_node); 367 368 while(np) { 369 if (of_device_is_compatible(np, "U4-pcie") || 370 of_device_is_compatible(np, "u4-pcie")) { 371 of_node_put(np); 372 return true; 373 } 374 np = of_get_next_parent(np); 375 } 376 return false; 377 } 378 379 static void pci_dma_dev_setup_dart(struct pci_dev *dev) 380 { 381 if (dart_is_u4 && dart_device_on_pcie(&dev->dev)) 382 dev->dev.archdata.dma_offset = DART_U4_BYPASS_BASE; 383 set_iommu_table_base(&dev->dev, &iommu_table_dart); 384 } 385 386 static bool iommu_bypass_supported_dart(struct pci_dev *dev, u64 mask) 387 { 388 return dart_is_u4 && 389 dart_device_on_pcie(&dev->dev) && 390 mask >= DMA_BIT_MASK(40); 391 } 392 393 void __init iommu_init_early_dart(struct pci_controller_ops *controller_ops) 394 { 395 struct device_node *dn; 396 397 /* Find the DART in the device-tree */ 398 dn = of_find_compatible_node(NULL, "dart", "u3-dart"); 399 if (dn == NULL) { 400 dn = of_find_compatible_node(NULL, "dart", "u4-dart"); 401 if (dn == NULL) 402 return; /* use default direct_dma_ops */ 403 dart_is_u4 = 1; 404 } 405 406 /* Initialize the DART HW */ 407 if (dart_init(dn) != 0) 408 return; 409 410 /* 411 * U4 supports a DART bypass, we use it for 64-bit capable devices to 412 * improve performance. However, that only works for devices connected 413 * to the U4 own PCIe interface, not bridged through hypertransport. 414 * We need the device to support at least 40 bits of addresses. 415 */ 416 controller_ops->dma_dev_setup = pci_dma_dev_setup_dart; 417 controller_ops->dma_bus_setup = pci_dma_bus_setup_dart; 418 controller_ops->iommu_bypass_supported = iommu_bypass_supported_dart; 419 420 /* Setup pci_dma ops */ 421 set_pci_dma_ops(&dma_iommu_ops); 422 } 423 424 #ifdef CONFIG_PM 425 static void iommu_dart_restore(void) 426 { 427 dart_cache_sync(dart_tablebase, dart_tablesize / sizeof(u32)); 428 dart_tlb_invalidate_all(); 429 } 430 431 static int __init iommu_init_late_dart(void) 432 { 433 if (!dart_tablebase) 434 return 0; 435 436 ppc_md.iommu_restore = iommu_dart_restore; 437 438 return 0; 439 } 440 441 late_initcall(iommu_init_late_dart); 442 #endif /* CONFIG_PM */ 443