1 /* 2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 */ 8 9 /* 10 * DMA Coherent API Notes 11 * 12 * I/O is inherently non-coherent on ARC. So a coherent DMA buffer is 13 * implemented by accessing it using a kernel virtual address, with 14 * Cache bit off in the TLB entry. 15 * 16 * The default DMA address == Phy address which is 0x8000_0000 based. 17 */ 18 19 #include <linux/dma-noncoherent.h> 20 #include <asm/cache.h> 21 #include <asm/cacheflush.h> 22 23 void *arch_dma_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, 24 gfp_t gfp, unsigned long attrs) 25 { 26 unsigned long order = get_order(size); 27 struct page *page; 28 phys_addr_t paddr; 29 void *kvaddr; 30 int need_coh = 1, need_kvaddr = 0; 31 32 page = alloc_pages(gfp, order); 33 if (!page) 34 return NULL; 35 36 /* 37 * IOC relies on all data (even coherent DMA data) being in cache 38 * Thus allocate normal cached memory 39 * 40 * The gains with IOC are two pronged: 41 * -For streaming data, elides need for cache maintenance, saving 42 * cycles in flush code, and bus bandwidth as all the lines of a 43 * buffer need to be flushed out to memory 44 * -For coherent data, Read/Write to buffers terminate early in cache 45 * (vs. always going to memory - thus are faster) 46 */ 47 if ((is_isa_arcv2() && ioc_enable) || 48 (attrs & DMA_ATTR_NON_CONSISTENT)) 49 need_coh = 0; 50 51 /* 52 * - A coherent buffer needs MMU mapping to enforce non-cachability 53 * - A highmem page needs a virtual handle (hence MMU mapping) 54 * independent of cachability 55 */ 56 if (PageHighMem(page) || need_coh) 57 need_kvaddr = 1; 58 59 /* This is linear addr (0x8000_0000 based) */ 60 paddr = page_to_phys(page); 61 62 *dma_handle = paddr; 63 64 /* This is kernel Virtual address (0x7000_0000 based) */ 65 if (need_kvaddr) { 66 kvaddr = ioremap_nocache(paddr, size); 67 if (kvaddr == NULL) { 68 __free_pages(page, order); 69 return NULL; 70 } 71 } else { 72 kvaddr = (void *)(u32)paddr; 73 } 74 75 /* 76 * Evict any existing L1 and/or L2 lines for the backing page 77 * in case it was used earlier as a normal "cached" page. 78 * Yeah this bit us - STAR 9000898266 79 * 80 * Although core does call flush_cache_vmap(), it gets kvaddr hence 81 * can't be used to efficiently flush L1 and/or L2 which need paddr 82 * Currently flush_cache_vmap nukes the L1 cache completely which 83 * will be optimized as a separate commit 84 */ 85 if (need_coh) 86 dma_cache_wback_inv(paddr, size); 87 88 return kvaddr; 89 } 90 91 void arch_dma_free(struct device *dev, size_t size, void *vaddr, 92 dma_addr_t dma_handle, unsigned long attrs) 93 { 94 phys_addr_t paddr = dma_handle; 95 struct page *page = virt_to_page(paddr); 96 int is_non_coh = 1; 97 98 is_non_coh = (attrs & DMA_ATTR_NON_CONSISTENT) || 99 (is_isa_arcv2() && ioc_enable); 100 101 if (PageHighMem(page) || !is_non_coh) 102 iounmap((void __force __iomem *)vaddr); 103 104 __free_pages(page, get_order(size)); 105 } 106 107 int arch_dma_mmap(struct device *dev, struct vm_area_struct *vma, 108 void *cpu_addr, dma_addr_t dma_addr, size_t size, 109 unsigned long attrs) 110 { 111 unsigned long user_count = vma_pages(vma); 112 unsigned long count = PAGE_ALIGN(size) >> PAGE_SHIFT; 113 unsigned long pfn = __phys_to_pfn(dma_addr); 114 unsigned long off = vma->vm_pgoff; 115 int ret = -ENXIO; 116 117 vma->vm_page_prot = pgprot_noncached(vma->vm_page_prot); 118 119 if (dma_mmap_from_dev_coherent(dev, vma, cpu_addr, size, &ret)) 120 return ret; 121 122 if (off < count && user_count <= (count - off)) { 123 ret = remap_pfn_range(vma, vma->vm_start, 124 pfn + off, 125 user_count << PAGE_SHIFT, 126 vma->vm_page_prot); 127 } 128 129 return ret; 130 } 131 132 /* 133 * Cache operations depending on function and direction argument, inspired by 134 * https://lkml.org/lkml/2018/5/18/979 135 * "dma_sync_*_for_cpu and direction=TO_DEVICE (was Re: [PATCH 02/20] 136 * dma-mapping: provide a generic dma-noncoherent implementation)" 137 * 138 * | map == for_device | unmap == for_cpu 139 * |---------------------------------------------------------------- 140 * TO_DEV | writeback writeback | none none 141 * FROM_DEV | invalidate invalidate | invalidate* invalidate* 142 * BIDIR | writeback+inv writeback+inv | invalidate invalidate 143 * 144 * [*] needed for CPU speculative prefetches 145 * 146 * NOTE: we don't check the validity of direction argument as it is done in 147 * upper layer functions (in include/linux/dma-mapping.h) 148 */ 149 150 void arch_sync_dma_for_device(struct device *dev, phys_addr_t paddr, 151 size_t size, enum dma_data_direction dir) 152 { 153 switch (dir) { 154 case DMA_TO_DEVICE: 155 dma_cache_wback(paddr, size); 156 break; 157 158 case DMA_FROM_DEVICE: 159 dma_cache_inv(paddr, size); 160 break; 161 162 case DMA_BIDIRECTIONAL: 163 dma_cache_wback_inv(paddr, size); 164 break; 165 166 default: 167 break; 168 } 169 } 170 171 void arch_sync_dma_for_cpu(struct device *dev, phys_addr_t paddr, 172 size_t size, enum dma_data_direction dir) 173 { 174 switch (dir) { 175 case DMA_TO_DEVICE: 176 break; 177 178 /* FROM_DEVICE invalidate needed if speculative CPU prefetch only */ 179 case DMA_FROM_DEVICE: 180 case DMA_BIDIRECTIONAL: 181 dma_cache_inv(paddr, size); 182 break; 183 184 default: 185 break; 186 } 187 } 188