1673a394bSEric Anholt /************************************************************************** 2673a394bSEric Anholt * 3673a394bSEric Anholt * Copyright (c) 2006-2007 Tungsten Graphics, Inc., Cedar Park, TX., USA 4673a394bSEric Anholt * All Rights Reserved. 5673a394bSEric Anholt * 6673a394bSEric Anholt * Permission is hereby granted, free of charge, to any person obtaining a 7673a394bSEric Anholt * copy of this software and associated documentation files (the 8673a394bSEric Anholt * "Software"), to deal in the Software without restriction, including 9673a394bSEric Anholt * without limitation the rights to use, copy, modify, merge, publish, 10673a394bSEric Anholt * distribute, sub license, and/or sell copies of the Software, and to 11673a394bSEric Anholt * permit persons to whom the Software is furnished to do so, subject to 12673a394bSEric Anholt * the following conditions: 13673a394bSEric Anholt * 14673a394bSEric Anholt * The above copyright notice and this permission notice (including the 15673a394bSEric Anholt * next paragraph) shall be included in all copies or substantial portions 16673a394bSEric Anholt * of the Software. 17673a394bSEric Anholt * 18673a394bSEric Anholt * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 19673a394bSEric Anholt * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 20673a394bSEric Anholt * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 21673a394bSEric Anholt * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, 22673a394bSEric Anholt * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 23673a394bSEric Anholt * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 24673a394bSEric Anholt * USE OR OTHER DEALINGS IN THE SOFTWARE. 25673a394bSEric Anholt * 26673a394bSEric Anholt **************************************************************************/ 27673a394bSEric Anholt /* 28673a394bSEric Anholt * Authors: Thomas Hellström <thomas-at-tungstengraphics-dot-com> 29673a394bSEric Anholt */ 30673a394bSEric Anholt 31*b7e32befSThomas Hellström #include <linux/dma-buf-map.h> 322d1a8a48SPaul Gortmaker #include <linux/export.h> 33b46b54a5SChris Wilson #include <linux/highmem.h> 3468104581SThomas Zimmermann #include <linux/mem_encrypt.h> 353abc6670SThomas Zimmermann #include <xen/xen.h> 36b46b54a5SChris Wilson 37b46b54a5SChris Wilson #include <drm/drm_cache.h> 38673a394bSEric Anholt 39*b7e32befSThomas Hellström /* A small bounce buffer that fits on the stack. */ 40*b7e32befSThomas Hellström #define MEMCPY_BOUNCE_SIZE 128 41*b7e32befSThomas Hellström 42673a394bSEric Anholt #if defined(CONFIG_X86) 43b04d4a38SBen Widawsky #include <asm/smp.h> 442a0c772fSRoss Zwisler 452a0c772fSRoss Zwisler /* 462a0c772fSRoss Zwisler * clflushopt is an unordered instruction which needs fencing with mfence or 472a0c772fSRoss Zwisler * sfence to avoid ordering issues. For drm_clflush_page this fencing happens 482a0c772fSRoss Zwisler * in the caller. 492a0c772fSRoss Zwisler */ 50673a394bSEric Anholt static void 51673a394bSEric Anholt drm_clflush_page(struct page *page) 52673a394bSEric Anholt { 53673a394bSEric Anholt uint8_t *page_virtual; 54673a394bSEric Anholt unsigned int i; 5587229ad9SDave Airlie const int size = boot_cpu_data.x86_clflush_size; 56673a394bSEric Anholt 57673a394bSEric Anholt if (unlikely(page == NULL)) 58673a394bSEric Anholt return; 59673a394bSEric Anholt 601c9c20f6SCong Wang page_virtual = kmap_atomic(page); 6187229ad9SDave Airlie for (i = 0; i < PAGE_SIZE; i += size) 622a0c772fSRoss Zwisler clflushopt(page_virtual + i); 631c9c20f6SCong Wang kunmap_atomic(page_virtual); 64673a394bSEric Anholt } 65673a394bSEric Anholt 66c9c97b8cSDave Airlie static void drm_cache_flush_clflush(struct page *pages[], 67c9c97b8cSDave Airlie unsigned long num_pages) 68c9c97b8cSDave Airlie { 69c9c97b8cSDave Airlie unsigned long i; 70c9c97b8cSDave Airlie 71606be047SBhanusree mb(); /*Full memory barrier used before so that CLFLUSH is ordered*/ 72c9c97b8cSDave Airlie for (i = 0; i < num_pages; i++) 73c9c97b8cSDave Airlie drm_clflush_page(*pages++); 74606be047SBhanusree mb(); /*Also used after CLFLUSH so that all cache is flushed*/ 75c9c97b8cSDave Airlie } 76c9c97b8cSDave Airlie #endif 77ed017d9fSDave Airlie 78f0e36723SGabriel Krisman Bertazi /** 79f0e36723SGabriel Krisman Bertazi * drm_clflush_pages - Flush dcache lines of a set of pages. 80f0e36723SGabriel Krisman Bertazi * @pages: List of pages to be flushed. 81f0e36723SGabriel Krisman Bertazi * @num_pages: Number of pages in the array. 82f0e36723SGabriel Krisman Bertazi * 83f0e36723SGabriel Krisman Bertazi * Flush every data cache line entry that points to an address belonging 84f0e36723SGabriel Krisman Bertazi * to a page in the array. 85f0e36723SGabriel Krisman Bertazi */ 86673a394bSEric Anholt void 87673a394bSEric Anholt drm_clflush_pages(struct page *pages[], unsigned long num_pages) 88673a394bSEric Anholt { 89673a394bSEric Anholt 90673a394bSEric Anholt #if defined(CONFIG_X86) 91906bf7fdSBorislav Petkov if (static_cpu_has(X86_FEATURE_CLFLUSH)) { 92c9c97b8cSDave Airlie drm_cache_flush_clflush(pages, num_pages); 93673a394bSEric Anholt return; 94673a394bSEric Anholt } 95673a394bSEric Anholt 96b04d4a38SBen Widawsky if (wbinvd_on_all_cpus()) 97499447dbSJoe Perches pr_err("Timed out waiting for cache flush\n"); 98c9c97b8cSDave Airlie 99c9c97b8cSDave Airlie #elif defined(__powerpc__) 100c9c97b8cSDave Airlie unsigned long i; 101e109c6dbSBhanusree 102c9c97b8cSDave Airlie for (i = 0; i < num_pages; i++) { 103c9c97b8cSDave Airlie struct page *page = pages[i]; 104c9c97b8cSDave Airlie void *page_virtual; 105c9c97b8cSDave Airlie 106c9c97b8cSDave Airlie if (unlikely(page == NULL)) 107c9c97b8cSDave Airlie continue; 108c9c97b8cSDave Airlie 1091c9c20f6SCong Wang page_virtual = kmap_atomic(page); 110c9c97b8cSDave Airlie flush_dcache_range((unsigned long)page_virtual, 111c9c97b8cSDave Airlie (unsigned long)page_virtual + PAGE_SIZE); 1121c9c20f6SCong Wang kunmap_atomic(page_virtual); 113c9c97b8cSDave Airlie } 114c9c97b8cSDave Airlie #else 115499447dbSJoe Perches pr_err("Architecture has no drm_cache.c support\n"); 116ed017d9fSDave Airlie WARN_ON_ONCE(1); 117e0f0754fSDave Airlie #endif 118673a394bSEric Anholt } 119673a394bSEric Anholt EXPORT_SYMBOL(drm_clflush_pages); 1206d5cd9cbSDaniel Vetter 121f0e36723SGabriel Krisman Bertazi /** 122f0e36723SGabriel Krisman Bertazi * drm_clflush_sg - Flush dcache lines pointing to a scather-gather. 123f0e36723SGabriel Krisman Bertazi * @st: struct sg_table. 124f0e36723SGabriel Krisman Bertazi * 125f0e36723SGabriel Krisman Bertazi * Flush every data cache line entry that points to an address in the 126f0e36723SGabriel Krisman Bertazi * sg. 127f0e36723SGabriel Krisman Bertazi */ 1286d5cd9cbSDaniel Vetter void 1299da3da66SChris Wilson drm_clflush_sg(struct sg_table *st) 1309da3da66SChris Wilson { 1319da3da66SChris Wilson #if defined(CONFIG_X86) 132906bf7fdSBorislav Petkov if (static_cpu_has(X86_FEATURE_CLFLUSH)) { 133f5ddf697SImre Deak struct sg_page_iter sg_iter; 1349da3da66SChris Wilson 135c6a5f8daSBhanusree mb(); /*CLFLUSH is ordered only by using memory barriers*/ 1366c6fa39cSMarek Szyprowski for_each_sgtable_page(st, &sg_iter, 0) 1372db76d7cSImre Deak drm_clflush_page(sg_page_iter_page(&sg_iter)); 138c6a5f8daSBhanusree mb(); /*Make sure that all cache line entry is flushed*/ 1399da3da66SChris Wilson 1409da3da66SChris Wilson return; 1419da3da66SChris Wilson } 1429da3da66SChris Wilson 143b04d4a38SBen Widawsky if (wbinvd_on_all_cpus()) 144499447dbSJoe Perches pr_err("Timed out waiting for cache flush\n"); 1459da3da66SChris Wilson #else 146499447dbSJoe Perches pr_err("Architecture has no drm_cache.c support\n"); 1479da3da66SChris Wilson WARN_ON_ONCE(1); 1489da3da66SChris Wilson #endif 1499da3da66SChris Wilson } 1509da3da66SChris Wilson EXPORT_SYMBOL(drm_clflush_sg); 1519da3da66SChris Wilson 152f0e36723SGabriel Krisman Bertazi /** 153f0e36723SGabriel Krisman Bertazi * drm_clflush_virt_range - Flush dcache lines of a region 154f0e36723SGabriel Krisman Bertazi * @addr: Initial kernel memory address. 155f0e36723SGabriel Krisman Bertazi * @length: Region size. 156f0e36723SGabriel Krisman Bertazi * 157f0e36723SGabriel Krisman Bertazi * Flush every data cache line entry that points to an address in the 158f0e36723SGabriel Krisman Bertazi * region requested. 159f0e36723SGabriel Krisman Bertazi */ 1609da3da66SChris Wilson void 161c2d15359SVille Syrjälä drm_clflush_virt_range(void *addr, unsigned long length) 1626d5cd9cbSDaniel Vetter { 1636d5cd9cbSDaniel Vetter #if defined(CONFIG_X86) 164906bf7fdSBorislav Petkov if (static_cpu_has(X86_FEATURE_CLFLUSH)) { 165afcd950cSChris Wilson const int size = boot_cpu_data.x86_clflush_size; 166c2d15359SVille Syrjälä void *end = addr + length; 167e109c6dbSBhanusree 168afcd950cSChris Wilson addr = (void *)(((unsigned long)addr) & -size); 169c6a5f8daSBhanusree mb(); /*CLFLUSH is only ordered with a full memory barrier*/ 170afcd950cSChris Wilson for (; addr < end; addr += size) 17179270968SRoss Zwisler clflushopt(addr); 172396f5d62SChris Wilson clflushopt(end - 1); /* force serialisation */ 173c6a5f8daSBhanusree mb(); /*Ensure that evry data cache line entry is flushed*/ 1746d5cd9cbSDaniel Vetter return; 1756d5cd9cbSDaniel Vetter } 1766d5cd9cbSDaniel Vetter 177b04d4a38SBen Widawsky if (wbinvd_on_all_cpus()) 178499447dbSJoe Perches pr_err("Timed out waiting for cache flush\n"); 1796d5cd9cbSDaniel Vetter #else 180499447dbSJoe Perches pr_err("Architecture has no drm_cache.c support\n"); 1816d5cd9cbSDaniel Vetter WARN_ON_ONCE(1); 1826d5cd9cbSDaniel Vetter #endif 1836d5cd9cbSDaniel Vetter } 1846d5cd9cbSDaniel Vetter EXPORT_SYMBOL(drm_clflush_virt_range); 1853abc6670SThomas Zimmermann 1863abc6670SThomas Zimmermann bool drm_need_swiotlb(int dma_bits) 1873abc6670SThomas Zimmermann { 1883abc6670SThomas Zimmermann struct resource *tmp; 1893abc6670SThomas Zimmermann resource_size_t max_iomem = 0; 1903abc6670SThomas Zimmermann 1913abc6670SThomas Zimmermann /* 1923abc6670SThomas Zimmermann * Xen paravirtual hosts require swiotlb regardless of requested dma 1933abc6670SThomas Zimmermann * transfer size. 1943abc6670SThomas Zimmermann * 1953abc6670SThomas Zimmermann * NOTE: Really, what it requires is use of the dma_alloc_coherent 1963abc6670SThomas Zimmermann * allocator used in ttm_dma_populate() instead of 1973abc6670SThomas Zimmermann * ttm_populate_and_map_pages(), which bounce buffers so much in 1983abc6670SThomas Zimmermann * Xen it leads to swiotlb buffer exhaustion. 1993abc6670SThomas Zimmermann */ 2003abc6670SThomas Zimmermann if (xen_pv_domain()) 2013abc6670SThomas Zimmermann return true; 2023abc6670SThomas Zimmermann 2033abc6670SThomas Zimmermann /* 2043abc6670SThomas Zimmermann * Enforce dma_alloc_coherent when memory encryption is active as well 2053abc6670SThomas Zimmermann * for the same reasons as for Xen paravirtual hosts. 2063abc6670SThomas Zimmermann */ 2073abc6670SThomas Zimmermann if (mem_encrypt_active()) 2083abc6670SThomas Zimmermann return true; 2093abc6670SThomas Zimmermann 2103abc6670SThomas Zimmermann for (tmp = iomem_resource.child; tmp; tmp = tmp->sibling) 2113abc6670SThomas Zimmermann max_iomem = max(max_iomem, tmp->end); 2123abc6670SThomas Zimmermann 2133abc6670SThomas Zimmermann return max_iomem > ((u64)1 << dma_bits); 2143abc6670SThomas Zimmermann } 2153abc6670SThomas Zimmermann EXPORT_SYMBOL(drm_need_swiotlb); 216*b7e32befSThomas Hellström 217*b7e32befSThomas Hellström static void memcpy_fallback(struct dma_buf_map *dst, 218*b7e32befSThomas Hellström const struct dma_buf_map *src, 219*b7e32befSThomas Hellström unsigned long len) 220*b7e32befSThomas Hellström { 221*b7e32befSThomas Hellström if (!dst->is_iomem && !src->is_iomem) { 222*b7e32befSThomas Hellström memcpy(dst->vaddr, src->vaddr, len); 223*b7e32befSThomas Hellström } else if (!src->is_iomem) { 224*b7e32befSThomas Hellström dma_buf_map_memcpy_to(dst, src->vaddr, len); 225*b7e32befSThomas Hellström } else if (!dst->is_iomem) { 226*b7e32befSThomas Hellström memcpy_fromio(dst->vaddr, src->vaddr_iomem, len); 227*b7e32befSThomas Hellström } else { 228*b7e32befSThomas Hellström /* 229*b7e32befSThomas Hellström * Bounce size is not performance tuned, but using a 230*b7e32befSThomas Hellström * bounce buffer like this is significantly faster than 231*b7e32befSThomas Hellström * resorting to ioreadxx() + iowritexx(). 232*b7e32befSThomas Hellström */ 233*b7e32befSThomas Hellström char bounce[MEMCPY_BOUNCE_SIZE]; 234*b7e32befSThomas Hellström void __iomem *_src = src->vaddr_iomem; 235*b7e32befSThomas Hellström void __iomem *_dst = dst->vaddr_iomem; 236*b7e32befSThomas Hellström 237*b7e32befSThomas Hellström while (len >= MEMCPY_BOUNCE_SIZE) { 238*b7e32befSThomas Hellström memcpy_fromio(bounce, _src, MEMCPY_BOUNCE_SIZE); 239*b7e32befSThomas Hellström memcpy_toio(_dst, bounce, MEMCPY_BOUNCE_SIZE); 240*b7e32befSThomas Hellström _src += MEMCPY_BOUNCE_SIZE; 241*b7e32befSThomas Hellström _dst += MEMCPY_BOUNCE_SIZE; 242*b7e32befSThomas Hellström len -= MEMCPY_BOUNCE_SIZE; 243*b7e32befSThomas Hellström } 244*b7e32befSThomas Hellström if (len) { 245*b7e32befSThomas Hellström memcpy_fromio(bounce, _src, MEMCPY_BOUNCE_SIZE); 246*b7e32befSThomas Hellström memcpy_toio(_dst, bounce, MEMCPY_BOUNCE_SIZE); 247*b7e32befSThomas Hellström } 248*b7e32befSThomas Hellström } 249*b7e32befSThomas Hellström } 250*b7e32befSThomas Hellström 251*b7e32befSThomas Hellström #ifdef CONFIG_X86 252*b7e32befSThomas Hellström 253*b7e32befSThomas Hellström static DEFINE_STATIC_KEY_FALSE(has_movntdqa); 254*b7e32befSThomas Hellström 255*b7e32befSThomas Hellström static void __memcpy_ntdqa(void *dst, const void *src, unsigned long len) 256*b7e32befSThomas Hellström { 257*b7e32befSThomas Hellström kernel_fpu_begin(); 258*b7e32befSThomas Hellström 259*b7e32befSThomas Hellström while (len >= 4) { 260*b7e32befSThomas Hellström asm("movntdqa (%0), %%xmm0\n" 261*b7e32befSThomas Hellström "movntdqa 16(%0), %%xmm1\n" 262*b7e32befSThomas Hellström "movntdqa 32(%0), %%xmm2\n" 263*b7e32befSThomas Hellström "movntdqa 48(%0), %%xmm3\n" 264*b7e32befSThomas Hellström "movaps %%xmm0, (%1)\n" 265*b7e32befSThomas Hellström "movaps %%xmm1, 16(%1)\n" 266*b7e32befSThomas Hellström "movaps %%xmm2, 32(%1)\n" 267*b7e32befSThomas Hellström "movaps %%xmm3, 48(%1)\n" 268*b7e32befSThomas Hellström :: "r" (src), "r" (dst) : "memory"); 269*b7e32befSThomas Hellström src += 64; 270*b7e32befSThomas Hellström dst += 64; 271*b7e32befSThomas Hellström len -= 4; 272*b7e32befSThomas Hellström } 273*b7e32befSThomas Hellström while (len--) { 274*b7e32befSThomas Hellström asm("movntdqa (%0), %%xmm0\n" 275*b7e32befSThomas Hellström "movaps %%xmm0, (%1)\n" 276*b7e32befSThomas Hellström :: "r" (src), "r" (dst) : "memory"); 277*b7e32befSThomas Hellström src += 16; 278*b7e32befSThomas Hellström dst += 16; 279*b7e32befSThomas Hellström } 280*b7e32befSThomas Hellström 281*b7e32befSThomas Hellström kernel_fpu_end(); 282*b7e32befSThomas Hellström } 283*b7e32befSThomas Hellström 284*b7e32befSThomas Hellström /* 285*b7e32befSThomas Hellström * __drm_memcpy_from_wc copies @len bytes from @src to @dst using 286*b7e32befSThomas Hellström * non-temporal instructions where available. Note that all arguments 287*b7e32befSThomas Hellström * (@src, @dst) must be aligned to 16 bytes and @len must be a multiple 288*b7e32befSThomas Hellström * of 16. 289*b7e32befSThomas Hellström */ 290*b7e32befSThomas Hellström static void __drm_memcpy_from_wc(void *dst, const void *src, unsigned long len) 291*b7e32befSThomas Hellström { 292*b7e32befSThomas Hellström if (unlikely(((unsigned long)dst | (unsigned long)src | len) & 15)) 293*b7e32befSThomas Hellström memcpy(dst, src, len); 294*b7e32befSThomas Hellström else if (likely(len)) 295*b7e32befSThomas Hellström __memcpy_ntdqa(dst, src, len >> 4); 296*b7e32befSThomas Hellström } 297*b7e32befSThomas Hellström 298*b7e32befSThomas Hellström /** 299*b7e32befSThomas Hellström * drm_memcpy_from_wc - Perform the fastest available memcpy from a source 300*b7e32befSThomas Hellström * that may be WC. 301*b7e32befSThomas Hellström * @dst: The destination pointer 302*b7e32befSThomas Hellström * @src: The source pointer 303*b7e32befSThomas Hellström * @len: The size of the area o transfer in bytes 304*b7e32befSThomas Hellström * 305*b7e32befSThomas Hellström * Tries an arch optimized memcpy for prefetching reading out of a WC region, 306*b7e32befSThomas Hellström * and if no such beast is available, falls back to a normal memcpy. 307*b7e32befSThomas Hellström */ 308*b7e32befSThomas Hellström void drm_memcpy_from_wc(struct dma_buf_map *dst, 309*b7e32befSThomas Hellström const struct dma_buf_map *src, 310*b7e32befSThomas Hellström unsigned long len) 311*b7e32befSThomas Hellström { 312*b7e32befSThomas Hellström if (WARN_ON(in_interrupt())) { 313*b7e32befSThomas Hellström memcpy_fallback(dst, src, len); 314*b7e32befSThomas Hellström return; 315*b7e32befSThomas Hellström } 316*b7e32befSThomas Hellström 317*b7e32befSThomas Hellström if (static_branch_likely(&has_movntdqa)) { 318*b7e32befSThomas Hellström __drm_memcpy_from_wc(dst->is_iomem ? 319*b7e32befSThomas Hellström (void __force *)dst->vaddr_iomem : 320*b7e32befSThomas Hellström dst->vaddr, 321*b7e32befSThomas Hellström src->is_iomem ? 322*b7e32befSThomas Hellström (void const __force *)src->vaddr_iomem : 323*b7e32befSThomas Hellström src->vaddr, 324*b7e32befSThomas Hellström len); 325*b7e32befSThomas Hellström return; 326*b7e32befSThomas Hellström } 327*b7e32befSThomas Hellström 328*b7e32befSThomas Hellström memcpy_fallback(dst, src, len); 329*b7e32befSThomas Hellström } 330*b7e32befSThomas Hellström EXPORT_SYMBOL(drm_memcpy_from_wc); 331*b7e32befSThomas Hellström 332*b7e32befSThomas Hellström /* 333*b7e32befSThomas Hellström * drm_memcpy_init_early - One time initialization of the WC memcpy code 334*b7e32befSThomas Hellström */ 335*b7e32befSThomas Hellström void drm_memcpy_init_early(void) 336*b7e32befSThomas Hellström { 337*b7e32befSThomas Hellström /* 338*b7e32befSThomas Hellström * Some hypervisors (e.g. KVM) don't support VEX-prefix instructions 339*b7e32befSThomas Hellström * emulation. So don't enable movntdqa in hypervisor guest. 340*b7e32befSThomas Hellström */ 341*b7e32befSThomas Hellström if (static_cpu_has(X86_FEATURE_XMM4_1) && 342*b7e32befSThomas Hellström !boot_cpu_has(X86_FEATURE_HYPERVISOR)) 343*b7e32befSThomas Hellström static_branch_enable(&has_movntdqa); 344*b7e32befSThomas Hellström } 345*b7e32befSThomas Hellström #else 346*b7e32befSThomas Hellström void drm_memcpy_from_wc(struct dma_buf_map *dst, 347*b7e32befSThomas Hellström const struct dma_buf_map *src, 348*b7e32befSThomas Hellström unsigned long len) 349*b7e32befSThomas Hellström { 350*b7e32befSThomas Hellström WARN_ON(in_interrupt()); 351*b7e32befSThomas Hellström 352*b7e32befSThomas Hellström memcpy_fallback(dst, src, len); 353*b7e32befSThomas Hellström } 354*b7e32befSThomas Hellström EXPORT_SYMBOL(drm_memcpy_from_wc); 355*b7e32befSThomas Hellström 356*b7e32befSThomas Hellström void drm_memcpy_init_early(void) 357*b7e32befSThomas Hellström { 358*b7e32befSThomas Hellström } 359*b7e32befSThomas Hellström #endif /* CONFIG_X86 */ 360