1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018 Christoph Hellwig. 4 * 5 * DMA operations that map physical memory directly without using an IOMMU. 6 */ 7 #include <linux/memblock.h> /* for max_pfn */ 8 #include <linux/export.h> 9 #include <linux/mm.h> 10 #include <linux/dma-direct.h> 11 #include <linux/scatterlist.h> 12 #include <linux/dma-contiguous.h> 13 #include <linux/dma-noncoherent.h> 14 #include <linux/pfn.h> 15 #include <linux/set_memory.h> 16 #include <linux/swiotlb.h> 17 18 /* 19 * Most architectures use ZONE_DMA for the first 16 Megabytes, but 20 * some use it for entirely different regions: 21 */ 22 #ifndef ARCH_ZONE_DMA_BITS 23 #define ARCH_ZONE_DMA_BITS 24 24 #endif 25 26 static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size) 27 { 28 if (!dev->dma_mask) { 29 dev_err_once(dev, "DMA map on device without dma_mask\n"); 30 } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { 31 dev_err_once(dev, 32 "overflow %pad+%zu of DMA mask %llx bus mask %llx\n", 33 &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask); 34 } 35 WARN_ON_ONCE(1); 36 } 37 38 static inline dma_addr_t phys_to_dma_direct(struct device *dev, 39 phys_addr_t phys) 40 { 41 if (force_dma_unencrypted(dev)) 42 return __phys_to_dma(dev, phys); 43 return phys_to_dma(dev, phys); 44 } 45 46 u64 dma_direct_get_required_mask(struct device *dev) 47 { 48 u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT); 49 50 if (dev->bus_dma_mask && dev->bus_dma_mask < max_dma) 51 max_dma = dev->bus_dma_mask; 52 53 return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; 54 } 55 56 static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, 57 u64 *phys_mask) 58 { 59 if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask) 60 dma_mask = dev->bus_dma_mask; 61 62 if (force_dma_unencrypted(dev)) 63 *phys_mask = __dma_to_phys(dev, dma_mask); 64 else 65 *phys_mask = dma_to_phys(dev, dma_mask); 66 67 /* 68 * Optimistically try the zone that the physical address mask falls 69 * into first. If that returns memory that isn't actually addressable 70 * we will fallback to the next lower zone and try again. 71 * 72 * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding 73 * zones. 74 */ 75 if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) 76 return GFP_DMA; 77 if (*phys_mask <= DMA_BIT_MASK(32)) 78 return GFP_DMA32; 79 return 0; 80 } 81 82 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) 83 { 84 return phys_to_dma_direct(dev, phys) + size - 1 <= 85 min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask); 86 } 87 88 struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, 89 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) 90 { 91 struct page *page = NULL; 92 u64 phys_mask; 93 94 if (attrs & DMA_ATTR_NO_WARN) 95 gfp |= __GFP_NOWARN; 96 97 /* we always manually zero the memory once we are done: */ 98 gfp &= ~__GFP_ZERO; 99 gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, 100 &phys_mask); 101 again: 102 page = dma_alloc_contiguous(dev, size, gfp); 103 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { 104 dma_free_contiguous(dev, page, size); 105 page = NULL; 106 107 if (IS_ENABLED(CONFIG_ZONE_DMA32) && 108 phys_mask < DMA_BIT_MASK(64) && 109 !(gfp & (GFP_DMA32 | GFP_DMA))) { 110 gfp |= GFP_DMA32; 111 goto again; 112 } 113 114 if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) { 115 gfp = (gfp & ~GFP_DMA32) | GFP_DMA; 116 goto again; 117 } 118 } 119 120 return page; 121 } 122 123 void *dma_direct_alloc_pages(struct device *dev, size_t size, 124 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) 125 { 126 struct page *page; 127 void *ret; 128 129 page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); 130 if (!page) 131 return NULL; 132 133 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { 134 /* remove any dirty cache lines on the kernel alias */ 135 if (!PageHighMem(page)) 136 arch_dma_prep_coherent(page, size); 137 /* return the page pointer as the opaque cookie */ 138 return page; 139 } 140 141 if (PageHighMem(page)) { 142 /* 143 * Depending on the cma= arguments and per-arch setup 144 * dma_alloc_contiguous could return highmem pages. 145 * Without remapping there is no way to return them here, 146 * so log an error and fail. 147 */ 148 dev_info(dev, "Rejecting highmem page from CMA.\n"); 149 __dma_direct_free_pages(dev, size, page); 150 return NULL; 151 } 152 153 ret = page_address(page); 154 if (force_dma_unencrypted(dev)) { 155 set_memory_decrypted((unsigned long)ret, 1 << get_order(size)); 156 *dma_handle = __phys_to_dma(dev, page_to_phys(page)); 157 } else { 158 *dma_handle = phys_to_dma(dev, page_to_phys(page)); 159 } 160 memset(ret, 0, size); 161 162 if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && 163 dma_alloc_need_uncached(dev, attrs)) { 164 arch_dma_prep_coherent(page, size); 165 ret = uncached_kernel_address(ret); 166 } 167 168 return ret; 169 } 170 171 void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page) 172 { 173 dma_free_contiguous(dev, page, size); 174 } 175 176 void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, 177 dma_addr_t dma_addr, unsigned long attrs) 178 { 179 unsigned int page_order = get_order(size); 180 181 if (attrs & DMA_ATTR_NO_KERNEL_MAPPING) { 182 /* cpu_addr is a struct page cookie, not a kernel address */ 183 __dma_direct_free_pages(dev, size, cpu_addr); 184 return; 185 } 186 187 if (force_dma_unencrypted(dev)) 188 set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); 189 190 if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && 191 dma_alloc_need_uncached(dev, attrs)) 192 cpu_addr = cached_kernel_address(cpu_addr); 193 __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr)); 194 } 195 196 void *dma_direct_alloc(struct device *dev, size_t size, 197 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) 198 { 199 if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && 200 dma_alloc_need_uncached(dev, attrs)) 201 return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); 202 return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); 203 } 204 205 void dma_direct_free(struct device *dev, size_t size, 206 void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) 207 { 208 if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && 209 dma_alloc_need_uncached(dev, attrs)) 210 arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); 211 else 212 dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); 213 } 214 215 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ 216 defined(CONFIG_SWIOTLB) 217 void dma_direct_sync_single_for_device(struct device *dev, 218 dma_addr_t addr, size_t size, enum dma_data_direction dir) 219 { 220 phys_addr_t paddr = dma_to_phys(dev, addr); 221 222 if (unlikely(is_swiotlb_buffer(paddr))) 223 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); 224 225 if (!dev_is_dma_coherent(dev)) 226 arch_sync_dma_for_device(dev, paddr, size, dir); 227 } 228 EXPORT_SYMBOL(dma_direct_sync_single_for_device); 229 230 void dma_direct_sync_sg_for_device(struct device *dev, 231 struct scatterlist *sgl, int nents, enum dma_data_direction dir) 232 { 233 struct scatterlist *sg; 234 int i; 235 236 for_each_sg(sgl, sg, nents, i) { 237 phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); 238 239 if (unlikely(is_swiotlb_buffer(paddr))) 240 swiotlb_tbl_sync_single(dev, paddr, sg->length, 241 dir, SYNC_FOR_DEVICE); 242 243 if (!dev_is_dma_coherent(dev)) 244 arch_sync_dma_for_device(dev, paddr, sg->length, 245 dir); 246 } 247 } 248 EXPORT_SYMBOL(dma_direct_sync_sg_for_device); 249 #endif 250 251 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ 252 defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ 253 defined(CONFIG_SWIOTLB) 254 void dma_direct_sync_single_for_cpu(struct device *dev, 255 dma_addr_t addr, size_t size, enum dma_data_direction dir) 256 { 257 phys_addr_t paddr = dma_to_phys(dev, addr); 258 259 if (!dev_is_dma_coherent(dev)) { 260 arch_sync_dma_for_cpu(dev, paddr, size, dir); 261 arch_sync_dma_for_cpu_all(dev); 262 } 263 264 if (unlikely(is_swiotlb_buffer(paddr))) 265 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); 266 } 267 EXPORT_SYMBOL(dma_direct_sync_single_for_cpu); 268 269 void dma_direct_sync_sg_for_cpu(struct device *dev, 270 struct scatterlist *sgl, int nents, enum dma_data_direction dir) 271 { 272 struct scatterlist *sg; 273 int i; 274 275 for_each_sg(sgl, sg, nents, i) { 276 phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); 277 278 if (!dev_is_dma_coherent(dev)) 279 arch_sync_dma_for_cpu(dev, paddr, sg->length, dir); 280 281 if (unlikely(is_swiotlb_buffer(paddr))) 282 swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, 283 SYNC_FOR_CPU); 284 } 285 286 if (!dev_is_dma_coherent(dev)) 287 arch_sync_dma_for_cpu_all(dev); 288 } 289 EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu); 290 291 void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, 292 size_t size, enum dma_data_direction dir, unsigned long attrs) 293 { 294 phys_addr_t phys = dma_to_phys(dev, addr); 295 296 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 297 dma_direct_sync_single_for_cpu(dev, addr, size, dir); 298 299 if (unlikely(is_swiotlb_buffer(phys))) 300 swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); 301 } 302 EXPORT_SYMBOL(dma_direct_unmap_page); 303 304 void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, 305 int nents, enum dma_data_direction dir, unsigned long attrs) 306 { 307 struct scatterlist *sg; 308 int i; 309 310 for_each_sg(sgl, sg, nents, i) 311 dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, 312 attrs); 313 } 314 EXPORT_SYMBOL(dma_direct_unmap_sg); 315 #endif 316 317 static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr, 318 size_t size) 319 { 320 return swiotlb_force != SWIOTLB_FORCE && 321 dma_capable(dev, dma_addr, size); 322 } 323 324 dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, 325 unsigned long offset, size_t size, enum dma_data_direction dir, 326 unsigned long attrs) 327 { 328 phys_addr_t phys = page_to_phys(page) + offset; 329 dma_addr_t dma_addr = phys_to_dma(dev, phys); 330 331 if (unlikely(!dma_direct_possible(dev, dma_addr, size)) && 332 !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) { 333 report_addr(dev, dma_addr, size); 334 return DMA_MAPPING_ERROR; 335 } 336 337 if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 338 arch_sync_dma_for_device(dev, phys, size, dir); 339 return dma_addr; 340 } 341 EXPORT_SYMBOL(dma_direct_map_page); 342 343 int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, 344 enum dma_data_direction dir, unsigned long attrs) 345 { 346 int i; 347 struct scatterlist *sg; 348 349 for_each_sg(sgl, sg, nents, i) { 350 sg->dma_address = dma_direct_map_page(dev, sg_page(sg), 351 sg->offset, sg->length, dir, attrs); 352 if (sg->dma_address == DMA_MAPPING_ERROR) 353 goto out_unmap; 354 sg_dma_len(sg) = sg->length; 355 } 356 357 return nents; 358 359 out_unmap: 360 dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); 361 return 0; 362 } 363 EXPORT_SYMBOL(dma_direct_map_sg); 364 365 dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, 366 size_t size, enum dma_data_direction dir, unsigned long attrs) 367 { 368 dma_addr_t dma_addr = paddr; 369 370 if (unlikely(!dma_direct_possible(dev, dma_addr, size))) { 371 report_addr(dev, dma_addr, size); 372 return DMA_MAPPING_ERROR; 373 } 374 375 return dma_addr; 376 } 377 EXPORT_SYMBOL(dma_direct_map_resource); 378 379 /* 380 * Because 32-bit DMA masks are so common we expect every architecture to be 381 * able to satisfy them - either by not supporting more physical memory, or by 382 * providing a ZONE_DMA32. If neither is the case, the architecture needs to 383 * use an IOMMU instead of the direct mapping. 384 */ 385 int dma_direct_supported(struct device *dev, u64 mask) 386 { 387 u64 min_mask; 388 389 if (IS_ENABLED(CONFIG_ZONE_DMA)) 390 min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS); 391 else 392 min_mask = DMA_BIT_MASK(32); 393 394 min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT); 395 396 /* 397 * This check needs to be against the actual bit mask value, so 398 * use __phys_to_dma() here so that the SME encryption mask isn't 399 * part of the check. 400 */ 401 return mask >= __phys_to_dma(dev, min_mask); 402 } 403 404 size_t dma_direct_max_mapping_size(struct device *dev) 405 { 406 /* If SWIOTLB is active, use its maximum mapping size */ 407 if (is_swiotlb_active() && 408 (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE)) 409 return swiotlb_max_mapping_size(dev); 410 return SIZE_MAX; 411 } 412