1*111d746bSScott Branden // SPDX-License-Identifier: GPL-2.0
2*111d746bSScott Branden /*
3*111d746bSScott Branden * Copyright 2018-2020 Broadcom.
4*111d746bSScott Branden */
5*111d746bSScott Branden #include <linux/dma-mapping.h>
6*111d746bSScott Branden #include <linux/mm.h>
7*111d746bSScott Branden #include <linux/pagemap.h>
8*111d746bSScott Branden #include <linux/pgtable.h>
9*111d746bSScott Branden #include <linux/vmalloc.h>
10*111d746bSScott Branden
11*111d746bSScott Branden #include <asm/page.h>
12*111d746bSScott Branden #include <asm/unaligned.h>
13*111d746bSScott Branden
14*111d746bSScott Branden #include <uapi/linux/misc/bcm_vk.h>
15*111d746bSScott Branden
16*111d746bSScott Branden #include "bcm_vk.h"
17*111d746bSScott Branden #include "bcm_vk_msg.h"
18*111d746bSScott Branden #include "bcm_vk_sg.h"
19*111d746bSScott Branden
20*111d746bSScott Branden /*
21*111d746bSScott Branden * Valkyrie has a hardware limitation of 16M transfer size.
22*111d746bSScott Branden * So limit the SGL chunks to 16M.
23*111d746bSScott Branden */
24*111d746bSScott Branden #define BCM_VK_MAX_SGL_CHUNK SZ_16M
25*111d746bSScott Branden
26*111d746bSScott Branden static int bcm_vk_dma_alloc(struct device *dev,
27*111d746bSScott Branden struct bcm_vk_dma *dma,
28*111d746bSScott Branden int dir,
29*111d746bSScott Branden struct _vk_data *vkdata);
30*111d746bSScott Branden static int bcm_vk_dma_free(struct device *dev, struct bcm_vk_dma *dma);
31*111d746bSScott Branden
32*111d746bSScott Branden /* Uncomment to dump SGLIST */
33*111d746bSScott Branden /* #define BCM_VK_DUMP_SGLIST */
34*111d746bSScott Branden
bcm_vk_dma_alloc(struct device * dev,struct bcm_vk_dma * dma,int direction,struct _vk_data * vkdata)35*111d746bSScott Branden static int bcm_vk_dma_alloc(struct device *dev,
36*111d746bSScott Branden struct bcm_vk_dma *dma,
37*111d746bSScott Branden int direction,
38*111d746bSScott Branden struct _vk_data *vkdata)
39*111d746bSScott Branden {
40*111d746bSScott Branden dma_addr_t addr, sg_addr;
41*111d746bSScott Branden int err;
42*111d746bSScott Branden int i;
43*111d746bSScott Branden int offset;
44*111d746bSScott Branden u32 size;
45*111d746bSScott Branden u32 remaining_size;
46*111d746bSScott Branden u32 transfer_size;
47*111d746bSScott Branden u64 data;
48*111d746bSScott Branden unsigned long first, last;
49*111d746bSScott Branden struct _vk_data *sgdata;
50*111d746bSScott Branden
51*111d746bSScott Branden /* Get 64-bit user address */
52*111d746bSScott Branden data = get_unaligned(&vkdata->address);
53*111d746bSScott Branden
54*111d746bSScott Branden /* offset into first page */
55*111d746bSScott Branden offset = offset_in_page(data);
56*111d746bSScott Branden
57*111d746bSScott Branden /* Calculate number of pages */
58*111d746bSScott Branden first = (data & PAGE_MASK) >> PAGE_SHIFT;
59*111d746bSScott Branden last = ((data + vkdata->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
60*111d746bSScott Branden dma->nr_pages = last - first + 1;
61*111d746bSScott Branden
62*111d746bSScott Branden /* Allocate DMA pages */
63*111d746bSScott Branden dma->pages = kmalloc_array(dma->nr_pages,
64*111d746bSScott Branden sizeof(struct page *),
65*111d746bSScott Branden GFP_KERNEL);
66*111d746bSScott Branden if (!dma->pages)
67*111d746bSScott Branden return -ENOMEM;
68*111d746bSScott Branden
69*111d746bSScott Branden dev_dbg(dev, "Alloc DMA Pages [0x%llx+0x%x => %d pages]\n",
70*111d746bSScott Branden data, vkdata->size, dma->nr_pages);
71*111d746bSScott Branden
72*111d746bSScott Branden dma->direction = direction;
73*111d746bSScott Branden
74*111d746bSScott Branden /* Get user pages into memory */
75*111d746bSScott Branden err = get_user_pages_fast(data & PAGE_MASK,
76*111d746bSScott Branden dma->nr_pages,
77*111d746bSScott Branden direction == DMA_FROM_DEVICE,
78*111d746bSScott Branden dma->pages);
79*111d746bSScott Branden if (err != dma->nr_pages) {
80*111d746bSScott Branden dma->nr_pages = (err >= 0) ? err : 0;
81*111d746bSScott Branden dev_err(dev, "get_user_pages_fast, err=%d [%d]\n",
82*111d746bSScott Branden err, dma->nr_pages);
83*111d746bSScott Branden return err < 0 ? err : -EINVAL;
84*111d746bSScott Branden }
85*111d746bSScott Branden
86*111d746bSScott Branden /* Max size of sg list is 1 per mapped page + fields at start */
87*111d746bSScott Branden dma->sglen = (dma->nr_pages * sizeof(*sgdata)) +
88*111d746bSScott Branden (sizeof(u32) * SGLIST_VKDATA_START);
89*111d746bSScott Branden
90*111d746bSScott Branden /* Allocate sglist */
91*111d746bSScott Branden dma->sglist = dma_alloc_coherent(dev,
92*111d746bSScott Branden dma->sglen,
93*111d746bSScott Branden &dma->handle,
94*111d746bSScott Branden GFP_KERNEL);
95*111d746bSScott Branden if (!dma->sglist)
96*111d746bSScott Branden return -ENOMEM;
97*111d746bSScott Branden
98*111d746bSScott Branden dma->sglist[SGLIST_NUM_SG] = 0;
99*111d746bSScott Branden dma->sglist[SGLIST_TOTALSIZE] = vkdata->size;
100*111d746bSScott Branden remaining_size = vkdata->size;
101*111d746bSScott Branden sgdata = (struct _vk_data *)&dma->sglist[SGLIST_VKDATA_START];
102*111d746bSScott Branden
103*111d746bSScott Branden /* Map all pages into DMA */
104*111d746bSScott Branden size = min_t(size_t, PAGE_SIZE - offset, remaining_size);
105*111d746bSScott Branden remaining_size -= size;
106*111d746bSScott Branden sg_addr = dma_map_page(dev,
107*111d746bSScott Branden dma->pages[0],
108*111d746bSScott Branden offset,
109*111d746bSScott Branden size,
110*111d746bSScott Branden dma->direction);
111*111d746bSScott Branden transfer_size = size;
112*111d746bSScott Branden if (unlikely(dma_mapping_error(dev, sg_addr))) {
113*111d746bSScott Branden __free_page(dma->pages[0]);
114*111d746bSScott Branden return -EIO;
115*111d746bSScott Branden }
116*111d746bSScott Branden
117*111d746bSScott Branden for (i = 1; i < dma->nr_pages; i++) {
118*111d746bSScott Branden size = min_t(size_t, PAGE_SIZE, remaining_size);
119*111d746bSScott Branden remaining_size -= size;
120*111d746bSScott Branden addr = dma_map_page(dev,
121*111d746bSScott Branden dma->pages[i],
122*111d746bSScott Branden 0,
123*111d746bSScott Branden size,
124*111d746bSScott Branden dma->direction);
125*111d746bSScott Branden if (unlikely(dma_mapping_error(dev, addr))) {
126*111d746bSScott Branden __free_page(dma->pages[i]);
127*111d746bSScott Branden return -EIO;
128*111d746bSScott Branden }
129*111d746bSScott Branden
130*111d746bSScott Branden /*
131*111d746bSScott Branden * Compress SG list entry when pages are contiguous
132*111d746bSScott Branden * and transfer size less or equal to BCM_VK_MAX_SGL_CHUNK
133*111d746bSScott Branden */
134*111d746bSScott Branden if ((addr == (sg_addr + transfer_size)) &&
135*111d746bSScott Branden ((transfer_size + size) <= BCM_VK_MAX_SGL_CHUNK)) {
136*111d746bSScott Branden /* pages are contiguous, add to same sg entry */
137*111d746bSScott Branden transfer_size += size;
138*111d746bSScott Branden } else {
139*111d746bSScott Branden /* pages are not contiguous, write sg entry */
140*111d746bSScott Branden sgdata->size = transfer_size;
141*111d746bSScott Branden put_unaligned(sg_addr, (u64 *)&sgdata->address);
142*111d746bSScott Branden dma->sglist[SGLIST_NUM_SG]++;
143*111d746bSScott Branden
144*111d746bSScott Branden /* start new sg entry */
145*111d746bSScott Branden sgdata++;
146*111d746bSScott Branden sg_addr = addr;
147*111d746bSScott Branden transfer_size = size;
148*111d746bSScott Branden }
149*111d746bSScott Branden }
150*111d746bSScott Branden /* Write last sg list entry */
151*111d746bSScott Branden sgdata->size = transfer_size;
152*111d746bSScott Branden put_unaligned(sg_addr, (u64 *)&sgdata->address);
153*111d746bSScott Branden dma->sglist[SGLIST_NUM_SG]++;
154*111d746bSScott Branden
155*111d746bSScott Branden /* Update pointers and size field to point to sglist */
156*111d746bSScott Branden put_unaligned((u64)dma->handle, &vkdata->address);
157*111d746bSScott Branden vkdata->size = (dma->sglist[SGLIST_NUM_SG] * sizeof(*sgdata)) +
158*111d746bSScott Branden (sizeof(u32) * SGLIST_VKDATA_START);
159*111d746bSScott Branden
160*111d746bSScott Branden #ifdef BCM_VK_DUMP_SGLIST
161*111d746bSScott Branden dev_dbg(dev,
162*111d746bSScott Branden "sgl 0x%llx handle 0x%llx, sglen: 0x%x sgsize: 0x%x\n",
163*111d746bSScott Branden (u64)dma->sglist,
164*111d746bSScott Branden dma->handle,
165*111d746bSScott Branden dma->sglen,
166*111d746bSScott Branden vkdata->size);
167*111d746bSScott Branden for (i = 0; i < vkdata->size / sizeof(u32); i++)
168*111d746bSScott Branden dev_dbg(dev, "i:0x%x 0x%x\n", i, dma->sglist[i]);
169*111d746bSScott Branden #endif
170*111d746bSScott Branden
171*111d746bSScott Branden return 0;
172*111d746bSScott Branden }
173*111d746bSScott Branden
bcm_vk_sg_alloc(struct device * dev,struct bcm_vk_dma * dma,int dir,struct _vk_data * vkdata,int num)174*111d746bSScott Branden int bcm_vk_sg_alloc(struct device *dev,
175*111d746bSScott Branden struct bcm_vk_dma *dma,
176*111d746bSScott Branden int dir,
177*111d746bSScott Branden struct _vk_data *vkdata,
178*111d746bSScott Branden int num)
179*111d746bSScott Branden {
180*111d746bSScott Branden int i;
181*111d746bSScott Branden int rc = -EINVAL;
182*111d746bSScott Branden
183*111d746bSScott Branden /* Convert user addresses to DMA SG List */
184*111d746bSScott Branden for (i = 0; i < num; i++) {
185*111d746bSScott Branden if (vkdata[i].size && vkdata[i].address) {
186*111d746bSScott Branden /*
187*111d746bSScott Branden * If both size and address are non-zero
188*111d746bSScott Branden * then DMA alloc.
189*111d746bSScott Branden */
190*111d746bSScott Branden rc = bcm_vk_dma_alloc(dev,
191*111d746bSScott Branden &dma[i],
192*111d746bSScott Branden dir,
193*111d746bSScott Branden &vkdata[i]);
194*111d746bSScott Branden } else if (vkdata[i].size ||
195*111d746bSScott Branden vkdata[i].address) {
196*111d746bSScott Branden /*
197*111d746bSScott Branden * If one of size and address are zero
198*111d746bSScott Branden * there is a problem.
199*111d746bSScott Branden */
200*111d746bSScott Branden dev_err(dev,
201*111d746bSScott Branden "Invalid vkdata %x 0x%x 0x%llx\n",
202*111d746bSScott Branden i, vkdata[i].size, vkdata[i].address);
203*111d746bSScott Branden rc = -EINVAL;
204*111d746bSScott Branden } else {
205*111d746bSScott Branden /*
206*111d746bSScott Branden * If size and address are both zero
207*111d746bSScott Branden * don't convert, but return success.
208*111d746bSScott Branden */
209*111d746bSScott Branden rc = 0;
210*111d746bSScott Branden }
211*111d746bSScott Branden
212*111d746bSScott Branden if (rc)
213*111d746bSScott Branden goto fail_alloc;
214*111d746bSScott Branden }
215*111d746bSScott Branden return rc;
216*111d746bSScott Branden
217*111d746bSScott Branden fail_alloc:
218*111d746bSScott Branden while (i > 0) {
219*111d746bSScott Branden i--;
220*111d746bSScott Branden if (dma[i].sglist)
221*111d746bSScott Branden bcm_vk_dma_free(dev, &dma[i]);
222*111d746bSScott Branden }
223*111d746bSScott Branden return rc;
224*111d746bSScott Branden }
225*111d746bSScott Branden
bcm_vk_dma_free(struct device * dev,struct bcm_vk_dma * dma)226*111d746bSScott Branden static int bcm_vk_dma_free(struct device *dev, struct bcm_vk_dma *dma)
227*111d746bSScott Branden {
228*111d746bSScott Branden dma_addr_t addr;
229*111d746bSScott Branden int i;
230*111d746bSScott Branden int num_sg;
231*111d746bSScott Branden u32 size;
232*111d746bSScott Branden struct _vk_data *vkdata;
233*111d746bSScott Branden
234*111d746bSScott Branden dev_dbg(dev, "free sglist=%p sglen=0x%x\n", dma->sglist, dma->sglen);
235*111d746bSScott Branden
236*111d746bSScott Branden /* Unmap all pages in the sglist */
237*111d746bSScott Branden num_sg = dma->sglist[SGLIST_NUM_SG];
238*111d746bSScott Branden vkdata = (struct _vk_data *)&dma->sglist[SGLIST_VKDATA_START];
239*111d746bSScott Branden for (i = 0; i < num_sg; i++) {
240*111d746bSScott Branden size = vkdata[i].size;
241*111d746bSScott Branden addr = get_unaligned(&vkdata[i].address);
242*111d746bSScott Branden
243*111d746bSScott Branden dma_unmap_page(dev, addr, size, dma->direction);
244*111d746bSScott Branden }
245*111d746bSScott Branden
246*111d746bSScott Branden /* Free allocated sglist */
247*111d746bSScott Branden dma_free_coherent(dev, dma->sglen, dma->sglist, dma->handle);
248*111d746bSScott Branden
249*111d746bSScott Branden /* Release lock on all pages */
250*111d746bSScott Branden for (i = 0; i < dma->nr_pages; i++)
251*111d746bSScott Branden put_page(dma->pages[i]);
252*111d746bSScott Branden
253*111d746bSScott Branden /* Free allocated dma pages */
254*111d746bSScott Branden kfree(dma->pages);
255*111d746bSScott Branden dma->sglist = NULL;
256*111d746bSScott Branden
257*111d746bSScott Branden return 0;
258*111d746bSScott Branden }
259*111d746bSScott Branden
bcm_vk_sg_free(struct device * dev,struct bcm_vk_dma * dma,int num,int * proc_cnt)260*111d746bSScott Branden int bcm_vk_sg_free(struct device *dev, struct bcm_vk_dma *dma, int num,
261*111d746bSScott Branden int *proc_cnt)
262*111d746bSScott Branden {
263*111d746bSScott Branden int i;
264*111d746bSScott Branden
265*111d746bSScott Branden *proc_cnt = 0;
266*111d746bSScott Branden /* Unmap and free all pages and sglists */
267*111d746bSScott Branden for (i = 0; i < num; i++) {
268*111d746bSScott Branden if (dma[i].sglist) {
269*111d746bSScott Branden bcm_vk_dma_free(dev, &dma[i]);
270*111d746bSScott Branden *proc_cnt += 1;
271*111d746bSScott Branden }
272*111d746bSScott Branden }
273*111d746bSScott Branden
274*111d746bSScott Branden return 0;
275*111d746bSScott Branden }
276