1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright (C) 2018 Christoph Hellwig. 4 * 5 * DMA operations that map physical memory directly without using an IOMMU. 6 */ 7 #include <linux/memblock.h> /* for max_pfn */ 8 #include <linux/export.h> 9 #include <linux/mm.h> 10 #include <linux/dma-direct.h> 11 #include <linux/scatterlist.h> 12 #include <linux/dma-contiguous.h> 13 #include <linux/dma-noncoherent.h> 14 #include <linux/pfn.h> 15 #include <linux/set_memory.h> 16 #include <linux/swiotlb.h> 17 18 /* 19 * Most architectures use ZONE_DMA for the first 16 Megabytes, but 20 * some use it for entirely different regions: 21 */ 22 #ifndef ARCH_ZONE_DMA_BITS 23 #define ARCH_ZONE_DMA_BITS 24 24 #endif 25 26 static void report_addr(struct device *dev, dma_addr_t dma_addr, size_t size) 27 { 28 if (!dev->dma_mask) { 29 dev_err_once(dev, "DMA map on device without dma_mask\n"); 30 } else if (*dev->dma_mask >= DMA_BIT_MASK(32) || dev->bus_dma_mask) { 31 dev_err_once(dev, 32 "overflow %pad+%zu of DMA mask %llx bus mask %llx\n", 33 &dma_addr, size, *dev->dma_mask, dev->bus_dma_mask); 34 } 35 WARN_ON_ONCE(1); 36 } 37 38 static inline dma_addr_t phys_to_dma_direct(struct device *dev, 39 phys_addr_t phys) 40 { 41 if (force_dma_unencrypted(dev)) 42 return __phys_to_dma(dev, phys); 43 return phys_to_dma(dev, phys); 44 } 45 46 u64 dma_direct_get_required_mask(struct device *dev) 47 { 48 u64 max_dma = phys_to_dma_direct(dev, (max_pfn - 1) << PAGE_SHIFT); 49 50 return (1ULL << (fls64(max_dma) - 1)) * 2 - 1; 51 } 52 53 static gfp_t __dma_direct_optimal_gfp_mask(struct device *dev, u64 dma_mask, 54 u64 *phys_mask) 55 { 56 if (dev->bus_dma_mask && dev->bus_dma_mask < dma_mask) 57 dma_mask = dev->bus_dma_mask; 58 59 if (force_dma_unencrypted(dev)) 60 *phys_mask = __dma_to_phys(dev, dma_mask); 61 else 62 *phys_mask = dma_to_phys(dev, dma_mask); 63 64 /* 65 * Optimistically try the zone that the physical address mask falls 66 * into first. If that returns memory that isn't actually addressable 67 * we will fallback to the next lower zone and try again. 68 * 69 * Note that GFP_DMA32 and GFP_DMA are no ops without the corresponding 70 * zones. 71 */ 72 if (*phys_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) 73 return GFP_DMA; 74 if (*phys_mask <= DMA_BIT_MASK(32)) 75 return GFP_DMA32; 76 return 0; 77 } 78 79 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) 80 { 81 return phys_to_dma_direct(dev, phys) + size - 1 <= 82 min_not_zero(dev->coherent_dma_mask, dev->bus_dma_mask); 83 } 84 85 struct page *__dma_direct_alloc_pages(struct device *dev, size_t size, 86 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) 87 { 88 struct page *page = NULL; 89 u64 phys_mask; 90 91 if (attrs & DMA_ATTR_NO_WARN) 92 gfp |= __GFP_NOWARN; 93 94 /* we always manually zero the memory once we are done: */ 95 gfp &= ~__GFP_ZERO; 96 gfp |= __dma_direct_optimal_gfp_mask(dev, dev->coherent_dma_mask, 97 &phys_mask); 98 again: 99 page = dma_alloc_contiguous(dev, size, gfp); 100 if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { 101 dma_free_contiguous(dev, page, size); 102 page = NULL; 103 104 if (IS_ENABLED(CONFIG_ZONE_DMA32) && 105 phys_mask < DMA_BIT_MASK(64) && 106 !(gfp & (GFP_DMA32 | GFP_DMA))) { 107 gfp |= GFP_DMA32; 108 goto again; 109 } 110 111 if (IS_ENABLED(CONFIG_ZONE_DMA) && !(gfp & GFP_DMA)) { 112 gfp = (gfp & ~GFP_DMA32) | GFP_DMA; 113 goto again; 114 } 115 } 116 117 return page; 118 } 119 120 void *dma_direct_alloc_pages(struct device *dev, size_t size, 121 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) 122 { 123 struct page *page; 124 void *ret; 125 126 page = __dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); 127 if (!page) 128 return NULL; 129 130 if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && 131 !force_dma_unencrypted(dev)) { 132 /* remove any dirty cache lines on the kernel alias */ 133 if (!PageHighMem(page)) 134 arch_dma_prep_coherent(page, size); 135 *dma_handle = phys_to_dma(dev, page_to_phys(page)); 136 /* return the page pointer as the opaque cookie */ 137 return page; 138 } 139 140 if (PageHighMem(page)) { 141 /* 142 * Depending on the cma= arguments and per-arch setup 143 * dma_alloc_contiguous could return highmem pages. 144 * Without remapping there is no way to return them here, 145 * so log an error and fail. 146 */ 147 dev_info(dev, "Rejecting highmem page from CMA.\n"); 148 __dma_direct_free_pages(dev, size, page); 149 return NULL; 150 } 151 152 ret = page_address(page); 153 if (force_dma_unencrypted(dev)) { 154 set_memory_decrypted((unsigned long)ret, 1 << get_order(size)); 155 *dma_handle = __phys_to_dma(dev, page_to_phys(page)); 156 } else { 157 *dma_handle = phys_to_dma(dev, page_to_phys(page)); 158 } 159 memset(ret, 0, size); 160 161 if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && 162 dma_alloc_need_uncached(dev, attrs)) { 163 arch_dma_prep_coherent(page, size); 164 ret = uncached_kernel_address(ret); 165 } 166 167 return ret; 168 } 169 170 void __dma_direct_free_pages(struct device *dev, size_t size, struct page *page) 171 { 172 dma_free_contiguous(dev, page, size); 173 } 174 175 void dma_direct_free_pages(struct device *dev, size_t size, void *cpu_addr, 176 dma_addr_t dma_addr, unsigned long attrs) 177 { 178 unsigned int page_order = get_order(size); 179 180 if ((attrs & DMA_ATTR_NO_KERNEL_MAPPING) && 181 !force_dma_unencrypted(dev)) { 182 /* cpu_addr is a struct page cookie, not a kernel address */ 183 __dma_direct_free_pages(dev, size, cpu_addr); 184 return; 185 } 186 187 if (force_dma_unencrypted(dev)) 188 set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); 189 190 if (IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && 191 dma_alloc_need_uncached(dev, attrs)) 192 cpu_addr = cached_kernel_address(cpu_addr); 193 __dma_direct_free_pages(dev, size, virt_to_page(cpu_addr)); 194 } 195 196 void *dma_direct_alloc(struct device *dev, size_t size, 197 dma_addr_t *dma_handle, gfp_t gfp, unsigned long attrs) 198 { 199 if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && 200 dma_alloc_need_uncached(dev, attrs)) 201 return arch_dma_alloc(dev, size, dma_handle, gfp, attrs); 202 return dma_direct_alloc_pages(dev, size, dma_handle, gfp, attrs); 203 } 204 205 void dma_direct_free(struct device *dev, size_t size, 206 void *cpu_addr, dma_addr_t dma_addr, unsigned long attrs) 207 { 208 if (!IS_ENABLED(CONFIG_ARCH_HAS_UNCACHED_SEGMENT) && 209 dma_alloc_need_uncached(dev, attrs)) 210 arch_dma_free(dev, size, cpu_addr, dma_addr, attrs); 211 else 212 dma_direct_free_pages(dev, size, cpu_addr, dma_addr, attrs); 213 } 214 215 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE) || \ 216 defined(CONFIG_SWIOTLB) 217 void dma_direct_sync_single_for_device(struct device *dev, 218 dma_addr_t addr, size_t size, enum dma_data_direction dir) 219 { 220 phys_addr_t paddr = dma_to_phys(dev, addr); 221 222 if (unlikely(is_swiotlb_buffer(paddr))) 223 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_DEVICE); 224 225 if (!dev_is_dma_coherent(dev)) 226 arch_sync_dma_for_device(dev, paddr, size, dir); 227 } 228 EXPORT_SYMBOL(dma_direct_sync_single_for_device); 229 230 void dma_direct_sync_sg_for_device(struct device *dev, 231 struct scatterlist *sgl, int nents, enum dma_data_direction dir) 232 { 233 struct scatterlist *sg; 234 int i; 235 236 for_each_sg(sgl, sg, nents, i) { 237 phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); 238 239 if (unlikely(is_swiotlb_buffer(paddr))) 240 swiotlb_tbl_sync_single(dev, paddr, sg->length, 241 dir, SYNC_FOR_DEVICE); 242 243 if (!dev_is_dma_coherent(dev)) 244 arch_sync_dma_for_device(dev, paddr, sg->length, 245 dir); 246 } 247 } 248 EXPORT_SYMBOL(dma_direct_sync_sg_for_device); 249 #endif 250 251 #if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) || \ 252 defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) || \ 253 defined(CONFIG_SWIOTLB) 254 void dma_direct_sync_single_for_cpu(struct device *dev, 255 dma_addr_t addr, size_t size, enum dma_data_direction dir) 256 { 257 phys_addr_t paddr = dma_to_phys(dev, addr); 258 259 if (!dev_is_dma_coherent(dev)) { 260 arch_sync_dma_for_cpu(dev, paddr, size, dir); 261 arch_sync_dma_for_cpu_all(dev); 262 } 263 264 if (unlikely(is_swiotlb_buffer(paddr))) 265 swiotlb_tbl_sync_single(dev, paddr, size, dir, SYNC_FOR_CPU); 266 } 267 EXPORT_SYMBOL(dma_direct_sync_single_for_cpu); 268 269 void dma_direct_sync_sg_for_cpu(struct device *dev, 270 struct scatterlist *sgl, int nents, enum dma_data_direction dir) 271 { 272 struct scatterlist *sg; 273 int i; 274 275 for_each_sg(sgl, sg, nents, i) { 276 phys_addr_t paddr = dma_to_phys(dev, sg_dma_address(sg)); 277 278 if (!dev_is_dma_coherent(dev)) 279 arch_sync_dma_for_cpu(dev, paddr, sg->length, dir); 280 281 if (unlikely(is_swiotlb_buffer(paddr))) 282 swiotlb_tbl_sync_single(dev, paddr, sg->length, dir, 283 SYNC_FOR_CPU); 284 } 285 286 if (!dev_is_dma_coherent(dev)) 287 arch_sync_dma_for_cpu_all(dev); 288 } 289 EXPORT_SYMBOL(dma_direct_sync_sg_for_cpu); 290 291 void dma_direct_unmap_page(struct device *dev, dma_addr_t addr, 292 size_t size, enum dma_data_direction dir, unsigned long attrs) 293 { 294 phys_addr_t phys = dma_to_phys(dev, addr); 295 296 if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 297 dma_direct_sync_single_for_cpu(dev, addr, size, dir); 298 299 if (unlikely(is_swiotlb_buffer(phys))) 300 swiotlb_tbl_unmap_single(dev, phys, size, dir, attrs); 301 } 302 EXPORT_SYMBOL(dma_direct_unmap_page); 303 304 void dma_direct_unmap_sg(struct device *dev, struct scatterlist *sgl, 305 int nents, enum dma_data_direction dir, unsigned long attrs) 306 { 307 struct scatterlist *sg; 308 int i; 309 310 for_each_sg(sgl, sg, nents, i) 311 dma_direct_unmap_page(dev, sg->dma_address, sg_dma_len(sg), dir, 312 attrs); 313 } 314 EXPORT_SYMBOL(dma_direct_unmap_sg); 315 #endif 316 317 static inline bool dma_direct_possible(struct device *dev, dma_addr_t dma_addr, 318 size_t size) 319 { 320 return swiotlb_force != SWIOTLB_FORCE && 321 dma_capable(dev, dma_addr, size); 322 } 323 324 dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, 325 unsigned long offset, size_t size, enum dma_data_direction dir, 326 unsigned long attrs) 327 { 328 phys_addr_t phys = page_to_phys(page) + offset; 329 dma_addr_t dma_addr = phys_to_dma(dev, phys); 330 331 if (unlikely(!dma_direct_possible(dev, dma_addr, size)) && 332 !swiotlb_map(dev, &phys, &dma_addr, size, dir, attrs)) { 333 report_addr(dev, dma_addr, size); 334 return DMA_MAPPING_ERROR; 335 } 336 337 if (!dev_is_dma_coherent(dev) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) 338 arch_sync_dma_for_device(dev, phys, size, dir); 339 return dma_addr; 340 } 341 EXPORT_SYMBOL(dma_direct_map_page); 342 343 int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, 344 enum dma_data_direction dir, unsigned long attrs) 345 { 346 int i; 347 struct scatterlist *sg; 348 349 for_each_sg(sgl, sg, nents, i) { 350 sg->dma_address = dma_direct_map_page(dev, sg_page(sg), 351 sg->offset, sg->length, dir, attrs); 352 if (sg->dma_address == DMA_MAPPING_ERROR) 353 goto out_unmap; 354 sg_dma_len(sg) = sg->length; 355 } 356 357 return nents; 358 359 out_unmap: 360 dma_direct_unmap_sg(dev, sgl, i, dir, attrs | DMA_ATTR_SKIP_CPU_SYNC); 361 return 0; 362 } 363 EXPORT_SYMBOL(dma_direct_map_sg); 364 365 dma_addr_t dma_direct_map_resource(struct device *dev, phys_addr_t paddr, 366 size_t size, enum dma_data_direction dir, unsigned long attrs) 367 { 368 dma_addr_t dma_addr = paddr; 369 370 if (unlikely(!dma_direct_possible(dev, dma_addr, size))) { 371 report_addr(dev, dma_addr, size); 372 return DMA_MAPPING_ERROR; 373 } 374 375 return dma_addr; 376 } 377 EXPORT_SYMBOL(dma_direct_map_resource); 378 379 /* 380 * Because 32-bit DMA masks are so common we expect every architecture to be 381 * able to satisfy them - either by not supporting more physical memory, or by 382 * providing a ZONE_DMA32. If neither is the case, the architecture needs to 383 * use an IOMMU instead of the direct mapping. 384 */ 385 int dma_direct_supported(struct device *dev, u64 mask) 386 { 387 u64 min_mask; 388 389 if (IS_ENABLED(CONFIG_ZONE_DMA)) 390 min_mask = DMA_BIT_MASK(ARCH_ZONE_DMA_BITS); 391 else 392 min_mask = DMA_BIT_MASK(32); 393 394 min_mask = min_t(u64, min_mask, (max_pfn - 1) << PAGE_SHIFT); 395 396 /* 397 * This check needs to be against the actual bit mask value, so 398 * use __phys_to_dma() here so that the SME encryption mask isn't 399 * part of the check. 400 */ 401 return mask >= __phys_to_dma(dev, min_mask); 402 } 403 404 size_t dma_direct_max_mapping_size(struct device *dev) 405 { 406 /* If SWIOTLB is active, use its maximum mapping size */ 407 if (is_swiotlb_active() && 408 (dma_addressing_limited(dev) || swiotlb_force == SWIOTLB_FORCE)) 409 return swiotlb_max_mapping_size(dev); 410 return SIZE_MAX; 411 } 412