xref: /openbmc/linux/drivers/misc/bcm-vk/bcm_vk_sg.c (revision d0034a7a4ac7fae708146ac0059b9c47a1543f0d)
1*111d746bSScott Branden // SPDX-License-Identifier: GPL-2.0
2*111d746bSScott Branden /*
3*111d746bSScott Branden  * Copyright 2018-2020 Broadcom.
4*111d746bSScott Branden  */
5*111d746bSScott Branden #include <linux/dma-mapping.h>
6*111d746bSScott Branden #include <linux/mm.h>
7*111d746bSScott Branden #include <linux/pagemap.h>
8*111d746bSScott Branden #include <linux/pgtable.h>
9*111d746bSScott Branden #include <linux/vmalloc.h>
10*111d746bSScott Branden 
11*111d746bSScott Branden #include <asm/page.h>
12*111d746bSScott Branden #include <asm/unaligned.h>
13*111d746bSScott Branden 
14*111d746bSScott Branden #include <uapi/linux/misc/bcm_vk.h>
15*111d746bSScott Branden 
16*111d746bSScott Branden #include "bcm_vk.h"
17*111d746bSScott Branden #include "bcm_vk_msg.h"
18*111d746bSScott Branden #include "bcm_vk_sg.h"
19*111d746bSScott Branden 
20*111d746bSScott Branden /*
21*111d746bSScott Branden  * Valkyrie has a hardware limitation of 16M transfer size.
22*111d746bSScott Branden  * So limit the SGL chunks to 16M.
23*111d746bSScott Branden  */
24*111d746bSScott Branden #define BCM_VK_MAX_SGL_CHUNK SZ_16M
25*111d746bSScott Branden 
26*111d746bSScott Branden static int bcm_vk_dma_alloc(struct device *dev,
27*111d746bSScott Branden 			    struct bcm_vk_dma *dma,
28*111d746bSScott Branden 			    int dir,
29*111d746bSScott Branden 			    struct _vk_data *vkdata);
30*111d746bSScott Branden static int bcm_vk_dma_free(struct device *dev, struct bcm_vk_dma *dma);
31*111d746bSScott Branden 
32*111d746bSScott Branden /* Uncomment to dump SGLIST */
33*111d746bSScott Branden /* #define BCM_VK_DUMP_SGLIST */
34*111d746bSScott Branden 
bcm_vk_dma_alloc(struct device * dev,struct bcm_vk_dma * dma,int direction,struct _vk_data * vkdata)35*111d746bSScott Branden static int bcm_vk_dma_alloc(struct device *dev,
36*111d746bSScott Branden 			    struct bcm_vk_dma *dma,
37*111d746bSScott Branden 			    int direction,
38*111d746bSScott Branden 			    struct _vk_data *vkdata)
39*111d746bSScott Branden {
40*111d746bSScott Branden 	dma_addr_t addr, sg_addr;
41*111d746bSScott Branden 	int err;
42*111d746bSScott Branden 	int i;
43*111d746bSScott Branden 	int offset;
44*111d746bSScott Branden 	u32 size;
45*111d746bSScott Branden 	u32 remaining_size;
46*111d746bSScott Branden 	u32 transfer_size;
47*111d746bSScott Branden 	u64 data;
48*111d746bSScott Branden 	unsigned long first, last;
49*111d746bSScott Branden 	struct _vk_data *sgdata;
50*111d746bSScott Branden 
51*111d746bSScott Branden 	/* Get 64-bit user address */
52*111d746bSScott Branden 	data = get_unaligned(&vkdata->address);
53*111d746bSScott Branden 
54*111d746bSScott Branden 	/* offset into first page */
55*111d746bSScott Branden 	offset = offset_in_page(data);
56*111d746bSScott Branden 
57*111d746bSScott Branden 	/* Calculate number of pages */
58*111d746bSScott Branden 	first = (data & PAGE_MASK) >> PAGE_SHIFT;
59*111d746bSScott Branden 	last  = ((data + vkdata->size - 1) & PAGE_MASK) >> PAGE_SHIFT;
60*111d746bSScott Branden 	dma->nr_pages = last - first + 1;
61*111d746bSScott Branden 
62*111d746bSScott Branden 	/* Allocate DMA pages */
63*111d746bSScott Branden 	dma->pages = kmalloc_array(dma->nr_pages,
64*111d746bSScott Branden 				   sizeof(struct page *),
65*111d746bSScott Branden 				   GFP_KERNEL);
66*111d746bSScott Branden 	if (!dma->pages)
67*111d746bSScott Branden 		return -ENOMEM;
68*111d746bSScott Branden 
69*111d746bSScott Branden 	dev_dbg(dev, "Alloc DMA Pages [0x%llx+0x%x => %d pages]\n",
70*111d746bSScott Branden 		data, vkdata->size, dma->nr_pages);
71*111d746bSScott Branden 
72*111d746bSScott Branden 	dma->direction = direction;
73*111d746bSScott Branden 
74*111d746bSScott Branden 	/* Get user pages into memory */
75*111d746bSScott Branden 	err = get_user_pages_fast(data & PAGE_MASK,
76*111d746bSScott Branden 				  dma->nr_pages,
77*111d746bSScott Branden 				  direction == DMA_FROM_DEVICE,
78*111d746bSScott Branden 				  dma->pages);
79*111d746bSScott Branden 	if (err != dma->nr_pages) {
80*111d746bSScott Branden 		dma->nr_pages = (err >= 0) ? err : 0;
81*111d746bSScott Branden 		dev_err(dev, "get_user_pages_fast, err=%d [%d]\n",
82*111d746bSScott Branden 			err, dma->nr_pages);
83*111d746bSScott Branden 		return err < 0 ? err : -EINVAL;
84*111d746bSScott Branden 	}
85*111d746bSScott Branden 
86*111d746bSScott Branden 	/* Max size of sg list is 1 per mapped page + fields at start */
87*111d746bSScott Branden 	dma->sglen = (dma->nr_pages * sizeof(*sgdata)) +
88*111d746bSScott Branden 		     (sizeof(u32) * SGLIST_VKDATA_START);
89*111d746bSScott Branden 
90*111d746bSScott Branden 	/* Allocate sglist */
91*111d746bSScott Branden 	dma->sglist = dma_alloc_coherent(dev,
92*111d746bSScott Branden 					 dma->sglen,
93*111d746bSScott Branden 					 &dma->handle,
94*111d746bSScott Branden 					 GFP_KERNEL);
95*111d746bSScott Branden 	if (!dma->sglist)
96*111d746bSScott Branden 		return -ENOMEM;
97*111d746bSScott Branden 
98*111d746bSScott Branden 	dma->sglist[SGLIST_NUM_SG] = 0;
99*111d746bSScott Branden 	dma->sglist[SGLIST_TOTALSIZE] = vkdata->size;
100*111d746bSScott Branden 	remaining_size = vkdata->size;
101*111d746bSScott Branden 	sgdata = (struct _vk_data *)&dma->sglist[SGLIST_VKDATA_START];
102*111d746bSScott Branden 
103*111d746bSScott Branden 	/* Map all pages into DMA */
104*111d746bSScott Branden 	size = min_t(size_t, PAGE_SIZE - offset, remaining_size);
105*111d746bSScott Branden 	remaining_size -= size;
106*111d746bSScott Branden 	sg_addr = dma_map_page(dev,
107*111d746bSScott Branden 			       dma->pages[0],
108*111d746bSScott Branden 			       offset,
109*111d746bSScott Branden 			       size,
110*111d746bSScott Branden 			       dma->direction);
111*111d746bSScott Branden 	transfer_size = size;
112*111d746bSScott Branden 	if (unlikely(dma_mapping_error(dev, sg_addr))) {
113*111d746bSScott Branden 		__free_page(dma->pages[0]);
114*111d746bSScott Branden 		return -EIO;
115*111d746bSScott Branden 	}
116*111d746bSScott Branden 
117*111d746bSScott Branden 	for (i = 1; i < dma->nr_pages; i++) {
118*111d746bSScott Branden 		size = min_t(size_t, PAGE_SIZE, remaining_size);
119*111d746bSScott Branden 		remaining_size -= size;
120*111d746bSScott Branden 		addr = dma_map_page(dev,
121*111d746bSScott Branden 				    dma->pages[i],
122*111d746bSScott Branden 				    0,
123*111d746bSScott Branden 				    size,
124*111d746bSScott Branden 				    dma->direction);
125*111d746bSScott Branden 		if (unlikely(dma_mapping_error(dev, addr))) {
126*111d746bSScott Branden 			__free_page(dma->pages[i]);
127*111d746bSScott Branden 			return -EIO;
128*111d746bSScott Branden 		}
129*111d746bSScott Branden 
130*111d746bSScott Branden 		/*
131*111d746bSScott Branden 		 * Compress SG list entry when pages are contiguous
132*111d746bSScott Branden 		 * and transfer size less or equal to BCM_VK_MAX_SGL_CHUNK
133*111d746bSScott Branden 		 */
134*111d746bSScott Branden 		if ((addr == (sg_addr + transfer_size)) &&
135*111d746bSScott Branden 		    ((transfer_size + size) <= BCM_VK_MAX_SGL_CHUNK)) {
136*111d746bSScott Branden 			/* pages are contiguous, add to same sg entry */
137*111d746bSScott Branden 			transfer_size += size;
138*111d746bSScott Branden 		} else {
139*111d746bSScott Branden 			/* pages are not contiguous, write sg entry */
140*111d746bSScott Branden 			sgdata->size = transfer_size;
141*111d746bSScott Branden 			put_unaligned(sg_addr, (u64 *)&sgdata->address);
142*111d746bSScott Branden 			dma->sglist[SGLIST_NUM_SG]++;
143*111d746bSScott Branden 
144*111d746bSScott Branden 			/* start new sg entry */
145*111d746bSScott Branden 			sgdata++;
146*111d746bSScott Branden 			sg_addr = addr;
147*111d746bSScott Branden 			transfer_size = size;
148*111d746bSScott Branden 		}
149*111d746bSScott Branden 	}
150*111d746bSScott Branden 	/* Write last sg list entry */
151*111d746bSScott Branden 	sgdata->size = transfer_size;
152*111d746bSScott Branden 	put_unaligned(sg_addr, (u64 *)&sgdata->address);
153*111d746bSScott Branden 	dma->sglist[SGLIST_NUM_SG]++;
154*111d746bSScott Branden 
155*111d746bSScott Branden 	/* Update pointers and size field to point to sglist */
156*111d746bSScott Branden 	put_unaligned((u64)dma->handle, &vkdata->address);
157*111d746bSScott Branden 	vkdata->size = (dma->sglist[SGLIST_NUM_SG] * sizeof(*sgdata)) +
158*111d746bSScott Branden 		       (sizeof(u32) * SGLIST_VKDATA_START);
159*111d746bSScott Branden 
160*111d746bSScott Branden #ifdef BCM_VK_DUMP_SGLIST
161*111d746bSScott Branden 	dev_dbg(dev,
162*111d746bSScott Branden 		"sgl 0x%llx handle 0x%llx, sglen: 0x%x sgsize: 0x%x\n",
163*111d746bSScott Branden 		(u64)dma->sglist,
164*111d746bSScott Branden 		dma->handle,
165*111d746bSScott Branden 		dma->sglen,
166*111d746bSScott Branden 		vkdata->size);
167*111d746bSScott Branden 	for (i = 0; i < vkdata->size / sizeof(u32); i++)
168*111d746bSScott Branden 		dev_dbg(dev, "i:0x%x 0x%x\n", i, dma->sglist[i]);
169*111d746bSScott Branden #endif
170*111d746bSScott Branden 
171*111d746bSScott Branden 	return 0;
172*111d746bSScott Branden }
173*111d746bSScott Branden 
bcm_vk_sg_alloc(struct device * dev,struct bcm_vk_dma * dma,int dir,struct _vk_data * vkdata,int num)174*111d746bSScott Branden int bcm_vk_sg_alloc(struct device *dev,
175*111d746bSScott Branden 		    struct bcm_vk_dma *dma,
176*111d746bSScott Branden 		    int dir,
177*111d746bSScott Branden 		    struct _vk_data *vkdata,
178*111d746bSScott Branden 		    int num)
179*111d746bSScott Branden {
180*111d746bSScott Branden 	int i;
181*111d746bSScott Branden 	int rc = -EINVAL;
182*111d746bSScott Branden 
183*111d746bSScott Branden 	/* Convert user addresses to DMA SG List */
184*111d746bSScott Branden 	for (i = 0; i < num; i++) {
185*111d746bSScott Branden 		if (vkdata[i].size && vkdata[i].address) {
186*111d746bSScott Branden 			/*
187*111d746bSScott Branden 			 * If both size and address are non-zero
188*111d746bSScott Branden 			 * then DMA alloc.
189*111d746bSScott Branden 			 */
190*111d746bSScott Branden 			rc = bcm_vk_dma_alloc(dev,
191*111d746bSScott Branden 					      &dma[i],
192*111d746bSScott Branden 					      dir,
193*111d746bSScott Branden 					      &vkdata[i]);
194*111d746bSScott Branden 		} else if (vkdata[i].size ||
195*111d746bSScott Branden 			   vkdata[i].address) {
196*111d746bSScott Branden 			/*
197*111d746bSScott Branden 			 * If one of size and address are zero
198*111d746bSScott Branden 			 * there is a problem.
199*111d746bSScott Branden 			 */
200*111d746bSScott Branden 			dev_err(dev,
201*111d746bSScott Branden 				"Invalid vkdata %x 0x%x 0x%llx\n",
202*111d746bSScott Branden 				i, vkdata[i].size, vkdata[i].address);
203*111d746bSScott Branden 			rc = -EINVAL;
204*111d746bSScott Branden 		} else {
205*111d746bSScott Branden 			/*
206*111d746bSScott Branden 			 * If size and address are both zero
207*111d746bSScott Branden 			 * don't convert, but return success.
208*111d746bSScott Branden 			 */
209*111d746bSScott Branden 			rc = 0;
210*111d746bSScott Branden 		}
211*111d746bSScott Branden 
212*111d746bSScott Branden 		if (rc)
213*111d746bSScott Branden 			goto fail_alloc;
214*111d746bSScott Branden 	}
215*111d746bSScott Branden 	return rc;
216*111d746bSScott Branden 
217*111d746bSScott Branden fail_alloc:
218*111d746bSScott Branden 	while (i > 0) {
219*111d746bSScott Branden 		i--;
220*111d746bSScott Branden 		if (dma[i].sglist)
221*111d746bSScott Branden 			bcm_vk_dma_free(dev, &dma[i]);
222*111d746bSScott Branden 	}
223*111d746bSScott Branden 	return rc;
224*111d746bSScott Branden }
225*111d746bSScott Branden 
bcm_vk_dma_free(struct device * dev,struct bcm_vk_dma * dma)226*111d746bSScott Branden static int bcm_vk_dma_free(struct device *dev, struct bcm_vk_dma *dma)
227*111d746bSScott Branden {
228*111d746bSScott Branden 	dma_addr_t addr;
229*111d746bSScott Branden 	int i;
230*111d746bSScott Branden 	int num_sg;
231*111d746bSScott Branden 	u32 size;
232*111d746bSScott Branden 	struct _vk_data *vkdata;
233*111d746bSScott Branden 
234*111d746bSScott Branden 	dev_dbg(dev, "free sglist=%p sglen=0x%x\n", dma->sglist, dma->sglen);
235*111d746bSScott Branden 
236*111d746bSScott Branden 	/* Unmap all pages in the sglist */
237*111d746bSScott Branden 	num_sg = dma->sglist[SGLIST_NUM_SG];
238*111d746bSScott Branden 	vkdata = (struct _vk_data *)&dma->sglist[SGLIST_VKDATA_START];
239*111d746bSScott Branden 	for (i = 0; i < num_sg; i++) {
240*111d746bSScott Branden 		size = vkdata[i].size;
241*111d746bSScott Branden 		addr = get_unaligned(&vkdata[i].address);
242*111d746bSScott Branden 
243*111d746bSScott Branden 		dma_unmap_page(dev, addr, size, dma->direction);
244*111d746bSScott Branden 	}
245*111d746bSScott Branden 
246*111d746bSScott Branden 	/* Free allocated sglist */
247*111d746bSScott Branden 	dma_free_coherent(dev, dma->sglen, dma->sglist, dma->handle);
248*111d746bSScott Branden 
249*111d746bSScott Branden 	/* Release lock on all pages */
250*111d746bSScott Branden 	for (i = 0; i < dma->nr_pages; i++)
251*111d746bSScott Branden 		put_page(dma->pages[i]);
252*111d746bSScott Branden 
253*111d746bSScott Branden 	/* Free allocated dma pages */
254*111d746bSScott Branden 	kfree(dma->pages);
255*111d746bSScott Branden 	dma->sglist = NULL;
256*111d746bSScott Branden 
257*111d746bSScott Branden 	return 0;
258*111d746bSScott Branden }
259*111d746bSScott Branden 
bcm_vk_sg_free(struct device * dev,struct bcm_vk_dma * dma,int num,int * proc_cnt)260*111d746bSScott Branden int bcm_vk_sg_free(struct device *dev, struct bcm_vk_dma *dma, int num,
261*111d746bSScott Branden 		   int *proc_cnt)
262*111d746bSScott Branden {
263*111d746bSScott Branden 	int i;
264*111d746bSScott Branden 
265*111d746bSScott Branden 	*proc_cnt = 0;
266*111d746bSScott Branden 	/* Unmap and free all pages and sglists */
267*111d746bSScott Branden 	for (i = 0; i < num; i++) {
268*111d746bSScott Branden 		if (dma[i].sglist) {
269*111d746bSScott Branden 			bcm_vk_dma_free(dev, &dma[i]);
270*111d746bSScott Branden 			*proc_cnt += 1;
271*111d746bSScott Branden 		}
272*111d746bSScott Branden 	}
273*111d746bSScott Branden 
274*111d746bSScott Branden 	return 0;
275*111d746bSScott Branden }
276