xref: /openbmc/linux/drivers/infiniband/sw/rxe/rxe_mr.c (revision 060f35a317ef09101b128f399dce7ed13d019461)
163fa15dbSBob Pearson // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
28700e3e7SMoni Shoua /*
38700e3e7SMoni Shoua  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
48700e3e7SMoni Shoua  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
58700e3e7SMoni Shoua  */
68700e3e7SMoni Shoua 
7ea1bb00eSLi Zhijian #include <linux/libnvdimm.h>
8ea1bb00eSLi Zhijian 
98700e3e7SMoni Shoua #include "rxe.h"
108700e3e7SMoni Shoua #include "rxe_loc.h"
118700e3e7SMoni Shoua 
12beec0239SBob Pearson /* Return a random 8 bit key value that is
13beec0239SBob Pearson  * different than the last_key. Set last_key to -1
14beec0239SBob Pearson  * if this is the first key for an MR or MW
158700e3e7SMoni Shoua  */
rxe_get_next_key(u32 last_key)16beec0239SBob Pearson u8 rxe_get_next_key(u32 last_key)
178700e3e7SMoni Shoua {
18beec0239SBob Pearson 	u8 key;
198700e3e7SMoni Shoua 
20beec0239SBob Pearson 	do {
21beec0239SBob Pearson 		get_random_bytes(&key, 1);
22beec0239SBob Pearson 	} while (key == last_key);
238700e3e7SMoni Shoua 
248700e3e7SMoni Shoua 	return key;
258700e3e7SMoni Shoua }
268700e3e7SMoni Shoua 
mr_check_range(struct rxe_mr * mr,u64 iova,size_t length)27364e282cSBob Pearson int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
288700e3e7SMoni Shoua {
2971d23639Syangx.jy@fujitsu.com 	switch (mr->ibmr.type) {
3047b7f706SBob Pearson 	case IB_MR_TYPE_DMA:
318700e3e7SMoni Shoua 		return 0;
328700e3e7SMoni Shoua 
3347b7f706SBob Pearson 	case IB_MR_TYPE_USER:
3447b7f706SBob Pearson 	case IB_MR_TYPE_MEM_REG:
35f04d5b3dSBob Pearson 		if (iova < mr->ibmr.iova ||
36f04d5b3dSBob Pearson 		    iova + length > mr->ibmr.iova + mr->ibmr.length) {
37*5dfd5a88SLi Zhijian 			rxe_dbg_mr(mr, "iova/length out of range\n");
38f04d5b3dSBob Pearson 			return -EINVAL;
39f04d5b3dSBob Pearson 		}
40647bf3d8SEyal Itkin 		return 0;
418700e3e7SMoni Shoua 
428700e3e7SMoni Shoua 	default:
43f04d5b3dSBob Pearson 		rxe_dbg_mr(mr, "mr type not supported\n");
44ade58da2SBob Pearson 		return -EINVAL;
458700e3e7SMoni Shoua 	}
468700e3e7SMoni Shoua }
478700e3e7SMoni Shoua 
rxe_mr_init(int access,struct rxe_mr * mr)48364e282cSBob Pearson static void rxe_mr_init(int access, struct rxe_mr *mr)
498700e3e7SMoni Shoua {
5086a3fb55SBob Pearson 	u32 key = mr->elem.index << 8 | rxe_get_next_key(-1);
518700e3e7SMoni Shoua 
5200134533SBob Pearson 	/* set ibmr->l/rkey and also copy into private l/rkey
5300134533SBob Pearson 	 * for user MRs these will always be the same
5400134533SBob Pearson 	 * for cases where caller 'owns' the key portion
5500134533SBob Pearson 	 * they may be different until REG_MR WQE is executed.
5600134533SBob Pearson 	 */
5786a3fb55SBob Pearson 	mr->lkey = mr->ibmr.lkey = key;
5886a3fb55SBob Pearson 	mr->rkey = mr->ibmr.rkey = key;
5900134533SBob Pearson 
60592627ccSBob Pearson 	mr->access = access;
61325a7eb8SBob Pearson 	mr->ibmr.page_size = PAGE_SIZE;
62325a7eb8SBob Pearson 	mr->page_mask = PAGE_MASK;
63325a7eb8SBob Pearson 	mr->page_shift = PAGE_SHIFT;
64364e282cSBob Pearson 	mr->state = RXE_MR_STATE_INVALID;
658700e3e7SMoni Shoua }
668700e3e7SMoni Shoua 
rxe_mr_init_dma(int access,struct rxe_mr * mr)6758651bbbSBob Pearson void rxe_mr_init_dma(int access, struct rxe_mr *mr)
688700e3e7SMoni Shoua {
69364e282cSBob Pearson 	rxe_mr_init(access, mr);
708700e3e7SMoni Shoua 
71364e282cSBob Pearson 	mr->state = RXE_MR_STATE_VALID;
7271d23639Syangx.jy@fujitsu.com 	mr->ibmr.type = IB_MR_TYPE_DMA;
738700e3e7SMoni Shoua }
748700e3e7SMoni Shoua 
rxe_mr_iova_to_index(struct rxe_mr * mr,u64 iova)75592627ccSBob Pearson static unsigned long rxe_mr_iova_to_index(struct rxe_mr *mr, u64 iova)
76592627ccSBob Pearson {
77592627ccSBob Pearson 	return (iova >> mr->page_shift) - (mr->ibmr.iova >> mr->page_shift);
78592627ccSBob Pearson }
79592627ccSBob Pearson 
rxe_mr_iova_to_page_offset(struct rxe_mr * mr,u64 iova)80592627ccSBob Pearson static unsigned long rxe_mr_iova_to_page_offset(struct rxe_mr *mr, u64 iova)
81592627ccSBob Pearson {
82592627ccSBob Pearson 	return iova & (mr_page_size(mr) - 1);
83592627ccSBob Pearson }
84592627ccSBob Pearson 
is_pmem_page(struct page * pg)8502ea0a51SLi Zhijian static bool is_pmem_page(struct page *pg)
8602ea0a51SLi Zhijian {
8702ea0a51SLi Zhijian 	unsigned long paddr = page_to_phys(pg);
8802ea0a51SLi Zhijian 
8902ea0a51SLi Zhijian 	return REGION_INTERSECTS ==
9002ea0a51SLi Zhijian 	       region_intersects(paddr, PAGE_SIZE, IORESOURCE_MEM,
9102ea0a51SLi Zhijian 				 IORES_DESC_PERSISTENT_MEMORY);
9202ea0a51SLi Zhijian }
9302ea0a51SLi Zhijian 
rxe_mr_fill_pages_from_sgt(struct rxe_mr * mr,struct sg_table * sgt)94592627ccSBob Pearson static int rxe_mr_fill_pages_from_sgt(struct rxe_mr *mr, struct sg_table *sgt)
95592627ccSBob Pearson {
96592627ccSBob Pearson 	XA_STATE(xas, &mr->page_list, 0);
97592627ccSBob Pearson 	struct sg_page_iter sg_iter;
98592627ccSBob Pearson 	struct page *page;
99592627ccSBob Pearson 	bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
100592627ccSBob Pearson 
101592627ccSBob Pearson 	__sg_page_iter_start(&sg_iter, sgt->sgl, sgt->orig_nents, 0);
102592627ccSBob Pearson 	if (!__sg_page_iter_next(&sg_iter))
103592627ccSBob Pearson 		return 0;
104592627ccSBob Pearson 
105592627ccSBob Pearson 	do {
106592627ccSBob Pearson 		xas_lock(&xas);
107592627ccSBob Pearson 		while (true) {
108592627ccSBob Pearson 			page = sg_page_iter_page(&sg_iter);
109592627ccSBob Pearson 
110592627ccSBob Pearson 			if (persistent && !is_pmem_page(page)) {
111592627ccSBob Pearson 				rxe_dbg_mr(mr, "Page can't be persistent\n");
112592627ccSBob Pearson 				xas_set_err(&xas, -EINVAL);
113592627ccSBob Pearson 				break;
114592627ccSBob Pearson 			}
115592627ccSBob Pearson 
116592627ccSBob Pearson 			xas_store(&xas, page);
117592627ccSBob Pearson 			if (xas_error(&xas))
118592627ccSBob Pearson 				break;
119592627ccSBob Pearson 			xas_next(&xas);
120592627ccSBob Pearson 			if (!__sg_page_iter_next(&sg_iter))
121592627ccSBob Pearson 				break;
122592627ccSBob Pearson 		}
123592627ccSBob Pearson 		xas_unlock(&xas);
124592627ccSBob Pearson 	} while (xas_nomem(&xas, GFP_KERNEL));
125592627ccSBob Pearson 
126592627ccSBob Pearson 	return xas_error(&xas);
127592627ccSBob Pearson }
128592627ccSBob Pearson 
rxe_mr_init_user(struct rxe_dev * rxe,u64 start,u64 length,u64 iova,int access,struct rxe_mr * mr)12958651bbbSBob Pearson int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
130cd5b010fSLang Cheng 		     int access, struct rxe_mr *mr)
1318700e3e7SMoni Shoua {
1328700e3e7SMoni Shoua 	struct ib_umem *umem;
1338700e3e7SMoni Shoua 	int err;
1348700e3e7SMoni Shoua 
135592627ccSBob Pearson 	rxe_mr_init(access, mr);
136592627ccSBob Pearson 
137592627ccSBob Pearson 	xa_init(&mr->page_list);
138592627ccSBob Pearson 
13958651bbbSBob Pearson 	umem = ib_umem_get(&rxe->ib_dev, start, length, access);
1408700e3e7SMoni Shoua 	if (IS_ERR(umem)) {
1412778b72bSBob Pearson 		rxe_dbg_mr(mr, "Unable to pin memory region err = %d\n",
1422778b72bSBob Pearson 			(int)PTR_ERR(umem));
143592627ccSBob Pearson 		return PTR_ERR(umem);
1448700e3e7SMoni Shoua 	}
1458700e3e7SMoni Shoua 
146592627ccSBob Pearson 	err = rxe_mr_fill_pages_from_sgt(mr, &umem->sgt_append.sgt);
1478700e3e7SMoni Shoua 	if (err) {
148592627ccSBob Pearson 		ib_umem_release(umem);
149592627ccSBob Pearson 		return err;
1508700e3e7SMoni Shoua 	}
1518700e3e7SMoni Shoua 
152364e282cSBob Pearson 	mr->umem = umem;
15371d23639Syangx.jy@fujitsu.com 	mr->ibmr.type = IB_MR_TYPE_USER;
154592627ccSBob Pearson 	mr->state = RXE_MR_STATE_VALID;
1558700e3e7SMoni Shoua 
1568700e3e7SMoni Shoua 	return 0;
157592627ccSBob Pearson }
1588700e3e7SMoni Shoua 
rxe_mr_alloc(struct rxe_mr * mr,int num_buf)159592627ccSBob Pearson static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
160592627ccSBob Pearson {
161592627ccSBob Pearson 	XA_STATE(xas, &mr->page_list, 0);
162592627ccSBob Pearson 	int i = 0;
163592627ccSBob Pearson 	int err;
164592627ccSBob Pearson 
165592627ccSBob Pearson 	xa_init(&mr->page_list);
166592627ccSBob Pearson 
167592627ccSBob Pearson 	do {
168592627ccSBob Pearson 		xas_lock(&xas);
169592627ccSBob Pearson 		while (i != num_buf) {
170592627ccSBob Pearson 			xas_store(&xas, XA_ZERO_ENTRY);
171592627ccSBob Pearson 			if (xas_error(&xas))
172592627ccSBob Pearson 				break;
173592627ccSBob Pearson 			xas_next(&xas);
174592627ccSBob Pearson 			i++;
175592627ccSBob Pearson 		}
176592627ccSBob Pearson 		xas_unlock(&xas);
177592627ccSBob Pearson 	} while (xas_nomem(&xas, GFP_KERNEL));
178592627ccSBob Pearson 
179592627ccSBob Pearson 	err = xas_error(&xas);
180592627ccSBob Pearson 	if (err)
1818700e3e7SMoni Shoua 		return err;
182592627ccSBob Pearson 
183592627ccSBob Pearson 	mr->num_buf = num_buf;
184592627ccSBob Pearson 
185592627ccSBob Pearson 	return 0;
1868700e3e7SMoni Shoua }
1878700e3e7SMoni Shoua 
rxe_mr_init_fast(int max_pages,struct rxe_mr * mr)18858651bbbSBob Pearson int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
1898700e3e7SMoni Shoua {
1908700e3e7SMoni Shoua 	int err;
1918700e3e7SMoni Shoua 
19200134533SBob Pearson 	/* always allow remote access for FMRs */
193d11442c6SBob Pearson 	rxe_mr_init(RXE_ACCESS_REMOTE, mr);
1948700e3e7SMoni Shoua 
1951e755506SLi Zhijian 	err = rxe_mr_alloc(mr, max_pages);
1968700e3e7SMoni Shoua 	if (err)
1978700e3e7SMoni Shoua 		goto err1;
1988700e3e7SMoni Shoua 
199364e282cSBob Pearson 	mr->state = RXE_MR_STATE_FREE;
20071d23639Syangx.jy@fujitsu.com 	mr->ibmr.type = IB_MR_TYPE_MEM_REG;
2018700e3e7SMoni Shoua 
2028700e3e7SMoni Shoua 	return 0;
2038700e3e7SMoni Shoua 
2048700e3e7SMoni Shoua err1:
2058700e3e7SMoni Shoua 	return err;
2068700e3e7SMoni Shoua }
2078700e3e7SMoni Shoua 
rxe_set_page(struct ib_mr * ibmr,u64 dma_addr)2088d7c7c0eSJason Gunthorpe static int rxe_set_page(struct ib_mr *ibmr, u64 dma_addr)
209db4729a5SBob Pearson {
210db4729a5SBob Pearson 	struct rxe_mr *mr = to_rmr(ibmr);
2118d7c7c0eSJason Gunthorpe 	struct page *page = ib_virt_dma_to_page(dma_addr);
212592627ccSBob Pearson 	bool persistent = !!(mr->access & IB_ACCESS_FLUSH_PERSISTENT);
213592627ccSBob Pearson 	int err;
214592627ccSBob Pearson 
215592627ccSBob Pearson 	if (persistent && !is_pmem_page(page)) {
216592627ccSBob Pearson 		rxe_dbg_mr(mr, "Page cannot be persistent\n");
217592627ccSBob Pearson 		return -EINVAL;
218592627ccSBob Pearson 	}
219db4729a5SBob Pearson 
220db4729a5SBob Pearson 	if (unlikely(mr->nbuf == mr->num_buf))
221db4729a5SBob Pearson 		return -ENOMEM;
222db4729a5SBob Pearson 
223592627ccSBob Pearson 	err = xa_err(xa_store(&mr->page_list, mr->nbuf, page, GFP_KERNEL));
224592627ccSBob Pearson 	if (err)
225592627ccSBob Pearson 		return err;
226db4729a5SBob Pearson 
227db4729a5SBob Pearson 	mr->nbuf++;
228db4729a5SBob Pearson 	return 0;
229db4729a5SBob Pearson }
230db4729a5SBob Pearson 
rxe_map_mr_sg(struct ib_mr * ibmr,struct scatterlist * sgl,int sg_nents,unsigned int * sg_offset)231592627ccSBob Pearson int rxe_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sgl,
232db4729a5SBob Pearson 		  int sg_nents, unsigned int *sg_offset)
233db4729a5SBob Pearson {
234db4729a5SBob Pearson 	struct rxe_mr *mr = to_rmr(ibmr);
235325a7eb8SBob Pearson 	unsigned int page_size = mr_page_size(mr);
236325a7eb8SBob Pearson 
237592627ccSBob Pearson 	mr->nbuf = 0;
238325a7eb8SBob Pearson 	mr->page_shift = ilog2(page_size);
239325a7eb8SBob Pearson 	mr->page_mask = ~((u64)page_size - 1);
240592627ccSBob Pearson 	mr->page_offset = mr->ibmr.iova & (page_size - 1);
241db4729a5SBob Pearson 
242592627ccSBob Pearson 	return ib_sg_to_pages(ibmr, sgl, sg_nents, sg_offset, rxe_set_page);
243db4729a5SBob Pearson }
244db4729a5SBob Pearson 
rxe_mr_copy_xarray(struct rxe_mr * mr,u64 iova,void * addr,unsigned int length,enum rxe_mr_copy_dir dir)245592627ccSBob Pearson static int rxe_mr_copy_xarray(struct rxe_mr *mr, u64 iova, void *addr,
246592627ccSBob Pearson 			      unsigned int length, enum rxe_mr_copy_dir dir)
2478700e3e7SMoni Shoua {
248592627ccSBob Pearson 	unsigned int page_offset = rxe_mr_iova_to_page_offset(mr, iova);
249592627ccSBob Pearson 	unsigned long index = rxe_mr_iova_to_index(mr, iova);
250592627ccSBob Pearson 	unsigned int bytes;
251592627ccSBob Pearson 	struct page *page;
252592627ccSBob Pearson 	void *va;
2538700e3e7SMoni Shoua 
254592627ccSBob Pearson 	while (length) {
255592627ccSBob Pearson 		page = xa_load(&mr->page_list, index);
256592627ccSBob Pearson 		if (!page)
257ea1bb00eSLi Zhijian 			return -EFAULT;
258ea1bb00eSLi Zhijian 
259592627ccSBob Pearson 		bytes = min_t(unsigned int, length,
260592627ccSBob Pearson 				mr_page_size(mr) - page_offset);
261592627ccSBob Pearson 		va = kmap_local_page(page);
262592627ccSBob Pearson 		if (dir == RXE_FROM_MR_OBJ)
263592627ccSBob Pearson 			memcpy(addr, va + page_offset, bytes);
264592627ccSBob Pearson 		else
265592627ccSBob Pearson 			memcpy(va + page_offset, addr, bytes);
266592627ccSBob Pearson 		kunmap_local(va);
267ea1bb00eSLi Zhijian 
268592627ccSBob Pearson 		page_offset = 0;
269592627ccSBob Pearson 		addr += bytes;
270ea1bb00eSLi Zhijian 		length -= bytes;
271592627ccSBob Pearson 		index++;
272ea1bb00eSLi Zhijian 	}
273ea1bb00eSLi Zhijian 
274ea1bb00eSLi Zhijian 	return 0;
275ea1bb00eSLi Zhijian }
276ea1bb00eSLi Zhijian 
rxe_mr_copy_dma(struct rxe_mr * mr,u64 dma_addr,void * addr,unsigned int length,enum rxe_mr_copy_dir dir)2778d7c7c0eSJason Gunthorpe static void rxe_mr_copy_dma(struct rxe_mr *mr, u64 dma_addr, void *addr,
278592627ccSBob Pearson 			    unsigned int length, enum rxe_mr_copy_dir dir)
279592627ccSBob Pearson {
2808d7c7c0eSJason Gunthorpe 	unsigned int page_offset = dma_addr & (PAGE_SIZE - 1);
281592627ccSBob Pearson 	unsigned int bytes;
282592627ccSBob Pearson 	struct page *page;
283592627ccSBob Pearson 	u8 *va;
284592627ccSBob Pearson 
285592627ccSBob Pearson 	while (length) {
2868d7c7c0eSJason Gunthorpe 		page = ib_virt_dma_to_page(dma_addr);
287592627ccSBob Pearson 		bytes = min_t(unsigned int, length,
288592627ccSBob Pearson 				PAGE_SIZE - page_offset);
289592627ccSBob Pearson 		va = kmap_local_page(page);
290592627ccSBob Pearson 
291592627ccSBob Pearson 		if (dir == RXE_TO_MR_OBJ)
292592627ccSBob Pearson 			memcpy(va + page_offset, addr, bytes);
293592627ccSBob Pearson 		else
294592627ccSBob Pearson 			memcpy(addr, va + page_offset, bytes);
295592627ccSBob Pearson 
296592627ccSBob Pearson 		kunmap_local(va);
297592627ccSBob Pearson 		page_offset = 0;
2988d7c7c0eSJason Gunthorpe 		dma_addr += bytes;
299592627ccSBob Pearson 		addr += bytes;
300592627ccSBob Pearson 		length -= bytes;
301592627ccSBob Pearson 	}
302592627ccSBob Pearson }
303592627ccSBob Pearson 
rxe_mr_copy(struct rxe_mr * mr,u64 iova,void * addr,unsigned int length,enum rxe_mr_copy_dir dir)304592627ccSBob Pearson int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr,
305592627ccSBob Pearson 		unsigned int length, enum rxe_mr_copy_dir dir)
3068700e3e7SMoni Shoua {
3078700e3e7SMoni Shoua 	int err;
3088700e3e7SMoni Shoua 
309d4fb5925SAndrew Boyer 	if (length == 0)
310d4fb5925SAndrew Boyer 		return 0;
311d4fb5925SAndrew Boyer 
3125ff31dfcSBob Pearson 	if (WARN_ON(!mr))
3135ff31dfcSBob Pearson 		return -EINVAL;
3145ff31dfcSBob Pearson 
31571d23639Syangx.jy@fujitsu.com 	if (mr->ibmr.type == IB_MR_TYPE_DMA) {
316592627ccSBob Pearson 		rxe_mr_copy_dma(mr, iova, addr, length, dir);
3178700e3e7SMoni Shoua 		return 0;
3188700e3e7SMoni Shoua 	}
3198700e3e7SMoni Shoua 
320364e282cSBob Pearson 	err = mr_check_range(mr, iova, length);
321592627ccSBob Pearson 	if (unlikely(err)) {
322*5dfd5a88SLi Zhijian 		rxe_dbg_mr(mr, "iova out of range\n");
3238700e3e7SMoni Shoua 		return err;
3248700e3e7SMoni Shoua 	}
3258700e3e7SMoni Shoua 
326592627ccSBob Pearson 	return rxe_mr_copy_xarray(mr, iova, addr, length, dir);
327592627ccSBob Pearson }
328592627ccSBob Pearson 
3298700e3e7SMoni Shoua /* copy data in or out of a wqe, i.e. sg list
3308700e3e7SMoni Shoua  * under the control of a dma descriptor
3318700e3e7SMoni Shoua  */
copy_data(struct rxe_pd * pd,int access,struct rxe_dma_info * dma,void * addr,int length,enum rxe_mr_copy_dir dir)3328700e3e7SMoni Shoua int copy_data(
3338700e3e7SMoni Shoua 	struct rxe_pd		*pd,
3348700e3e7SMoni Shoua 	int			access,
3358700e3e7SMoni Shoua 	struct rxe_dma_info	*dma,
3368700e3e7SMoni Shoua 	void			*addr,
3378700e3e7SMoni Shoua 	int			length,
3381117f26eSBob Pearson 	enum rxe_mr_copy_dir	dir)
3398700e3e7SMoni Shoua {
3408700e3e7SMoni Shoua 	int			bytes;
3418700e3e7SMoni Shoua 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
3428700e3e7SMoni Shoua 	int			offset	= dma->sge_offset;
3438700e3e7SMoni Shoua 	int			resid	= dma->resid;
344364e282cSBob Pearson 	struct rxe_mr		*mr	= NULL;
3458700e3e7SMoni Shoua 	u64			iova;
3468700e3e7SMoni Shoua 	int			err;
3478700e3e7SMoni Shoua 
3488700e3e7SMoni Shoua 	if (length == 0)
3498700e3e7SMoni Shoua 		return 0;
3508700e3e7SMoni Shoua 
3518700e3e7SMoni Shoua 	if (length > resid) {
3528700e3e7SMoni Shoua 		err = -EINVAL;
3538700e3e7SMoni Shoua 		goto err2;
3548700e3e7SMoni Shoua 	}
3558700e3e7SMoni Shoua 
3568700e3e7SMoni Shoua 	if (sge->length && (offset < sge->length)) {
3573902b429SBob Pearson 		mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
358364e282cSBob Pearson 		if (!mr) {
3598700e3e7SMoni Shoua 			err = -EINVAL;
3608700e3e7SMoni Shoua 			goto err1;
3618700e3e7SMoni Shoua 		}
3628700e3e7SMoni Shoua 	}
3638700e3e7SMoni Shoua 
3648700e3e7SMoni Shoua 	while (length > 0) {
3658700e3e7SMoni Shoua 		bytes = length;
3668700e3e7SMoni Shoua 
3678700e3e7SMoni Shoua 		if (offset >= sge->length) {
368364e282cSBob Pearson 			if (mr) {
3693197706aSBob Pearson 				rxe_put(mr);
370364e282cSBob Pearson 				mr = NULL;
3718700e3e7SMoni Shoua 			}
3728700e3e7SMoni Shoua 			sge++;
3738700e3e7SMoni Shoua 			dma->cur_sge++;
3748700e3e7SMoni Shoua 			offset = 0;
3758700e3e7SMoni Shoua 
3768700e3e7SMoni Shoua 			if (dma->cur_sge >= dma->num_sge) {
3778700e3e7SMoni Shoua 				err = -ENOSPC;
3788700e3e7SMoni Shoua 				goto err2;
3798700e3e7SMoni Shoua 			}
3808700e3e7SMoni Shoua 
3818700e3e7SMoni Shoua 			if (sge->length) {
382364e282cSBob Pearson 				mr = lookup_mr(pd, access, sge->lkey,
3833902b429SBob Pearson 					       RXE_LOOKUP_LOCAL);
384364e282cSBob Pearson 				if (!mr) {
3858700e3e7SMoni Shoua 					err = -EINVAL;
3868700e3e7SMoni Shoua 					goto err1;
3878700e3e7SMoni Shoua 				}
3888700e3e7SMoni Shoua 			} else {
3898700e3e7SMoni Shoua 				continue;
3908700e3e7SMoni Shoua 			}
3918700e3e7SMoni Shoua 		}
3928700e3e7SMoni Shoua 
3938700e3e7SMoni Shoua 		if (bytes > sge->length - offset)
3948700e3e7SMoni Shoua 			bytes = sge->length - offset;
3958700e3e7SMoni Shoua 
3968700e3e7SMoni Shoua 		if (bytes > 0) {
3978700e3e7SMoni Shoua 			iova = sge->addr + offset;
3981117f26eSBob Pearson 			err = rxe_mr_copy(mr, iova, addr, bytes, dir);
3998700e3e7SMoni Shoua 			if (err)
4008700e3e7SMoni Shoua 				goto err2;
4018700e3e7SMoni Shoua 
4028700e3e7SMoni Shoua 			offset	+= bytes;
4038700e3e7SMoni Shoua 			resid	-= bytes;
4048700e3e7SMoni Shoua 			length	-= bytes;
4058700e3e7SMoni Shoua 			addr	+= bytes;
4068700e3e7SMoni Shoua 		}
4078700e3e7SMoni Shoua 	}
4088700e3e7SMoni Shoua 
4098700e3e7SMoni Shoua 	dma->sge_offset = offset;
4108700e3e7SMoni Shoua 	dma->resid	= resid;
4118700e3e7SMoni Shoua 
412364e282cSBob Pearson 	if (mr)
4133197706aSBob Pearson 		rxe_put(mr);
4148700e3e7SMoni Shoua 
4158700e3e7SMoni Shoua 	return 0;
4168700e3e7SMoni Shoua 
4178700e3e7SMoni Shoua err2:
418364e282cSBob Pearson 	if (mr)
4193197706aSBob Pearson 		rxe_put(mr);
4208700e3e7SMoni Shoua err1:
4218700e3e7SMoni Shoua 	return err;
4228700e3e7SMoni Shoua }
4238700e3e7SMoni Shoua 
rxe_flush_pmem_iova(struct rxe_mr * mr,u64 iova,unsigned int length)424592627ccSBob Pearson int rxe_flush_pmem_iova(struct rxe_mr *mr, u64 iova, unsigned int length)
425592627ccSBob Pearson {
426592627ccSBob Pearson 	unsigned int page_offset;
427592627ccSBob Pearson 	unsigned long index;
428592627ccSBob Pearson 	struct page *page;
429592627ccSBob Pearson 	unsigned int bytes;
430592627ccSBob Pearson 	int err;
431592627ccSBob Pearson 	u8 *va;
432592627ccSBob Pearson 
4335ff31dfcSBob Pearson 	/* mr must be valid even if length is zero */
4345ff31dfcSBob Pearson 	if (WARN_ON(!mr))
4355ff31dfcSBob Pearson 		return -EINVAL;
4365ff31dfcSBob Pearson 
437592627ccSBob Pearson 	if (length == 0)
438592627ccSBob Pearson 		return 0;
439592627ccSBob Pearson 
440592627ccSBob Pearson 	if (mr->ibmr.type == IB_MR_TYPE_DMA)
441592627ccSBob Pearson 		return -EFAULT;
442592627ccSBob Pearson 
443592627ccSBob Pearson 	err = mr_check_range(mr, iova, length);
444592627ccSBob Pearson 	if (err)
445592627ccSBob Pearson 		return err;
446592627ccSBob Pearson 
447592627ccSBob Pearson 	while (length > 0) {
448592627ccSBob Pearson 		index = rxe_mr_iova_to_index(mr, iova);
449592627ccSBob Pearson 		page = xa_load(&mr->page_list, index);
450592627ccSBob Pearson 		page_offset = rxe_mr_iova_to_page_offset(mr, iova);
451592627ccSBob Pearson 		if (!page)
452592627ccSBob Pearson 			return -EFAULT;
453592627ccSBob Pearson 		bytes = min_t(unsigned int, length,
454592627ccSBob Pearson 				mr_page_size(mr) - page_offset);
455592627ccSBob Pearson 
456592627ccSBob Pearson 		va = kmap_local_page(page);
457592627ccSBob Pearson 		arch_wb_cache_pmem(va + page_offset, bytes);
458592627ccSBob Pearson 		kunmap_local(va);
459592627ccSBob Pearson 
460592627ccSBob Pearson 		length -= bytes;
461592627ccSBob Pearson 		iova += bytes;
462592627ccSBob Pearson 		page_offset = 0;
463592627ccSBob Pearson 	}
464592627ccSBob Pearson 
465592627ccSBob Pearson 	return 0;
466592627ccSBob Pearson }
467592627ccSBob Pearson 
468f04d5b3dSBob Pearson /* Guarantee atomicity of atomic operations at the machine level. */
469f04d5b3dSBob Pearson static DEFINE_SPINLOCK(atomic_ops_lock);
470f04d5b3dSBob Pearson 
rxe_mr_do_atomic_op(struct rxe_mr * mr,u64 iova,int opcode,u64 compare,u64 swap_add,u64 * orig_val)471f04d5b3dSBob Pearson int rxe_mr_do_atomic_op(struct rxe_mr *mr, u64 iova, int opcode,
472f04d5b3dSBob Pearson 			u64 compare, u64 swap_add, u64 *orig_val)
473f04d5b3dSBob Pearson {
474592627ccSBob Pearson 	unsigned int page_offset;
475592627ccSBob Pearson 	struct page *page;
476f04d5b3dSBob Pearson 	u64 value;
477592627ccSBob Pearson 	u64 *va;
478f04d5b3dSBob Pearson 
479592627ccSBob Pearson 	if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
480*5dfd5a88SLi Zhijian 		rxe_dbg_mr(mr, "mr not in valid state\n");
481f04d5b3dSBob Pearson 		return RESPST_ERR_RKEY_VIOLATION;
482f04d5b3dSBob Pearson 	}
483f04d5b3dSBob Pearson 
484592627ccSBob Pearson 	if (mr->ibmr.type == IB_MR_TYPE_DMA) {
485592627ccSBob Pearson 		page_offset = iova & (PAGE_SIZE - 1);
4868d7c7c0eSJason Gunthorpe 		page = ib_virt_dma_to_page(iova);
487592627ccSBob Pearson 	} else {
488592627ccSBob Pearson 		unsigned long index;
489592627ccSBob Pearson 		int err;
490592627ccSBob Pearson 
491592627ccSBob Pearson 		err = mr_check_range(mr, iova, sizeof(value));
492592627ccSBob Pearson 		if (err) {
493*5dfd5a88SLi Zhijian 			rxe_dbg_mr(mr, "iova out of range\n");
494f04d5b3dSBob Pearson 			return RESPST_ERR_RKEY_VIOLATION;
495f04d5b3dSBob Pearson 		}
496592627ccSBob Pearson 		page_offset = rxe_mr_iova_to_page_offset(mr, iova);
497592627ccSBob Pearson 		index = rxe_mr_iova_to_index(mr, iova);
498592627ccSBob Pearson 		page = xa_load(&mr->page_list, index);
499592627ccSBob Pearson 		if (!page)
500592627ccSBob Pearson 			return RESPST_ERR_RKEY_VIOLATION;
501592627ccSBob Pearson 	}
502f04d5b3dSBob Pearson 
503592627ccSBob Pearson 	if (unlikely(page_offset & 0x7)) {
504*5dfd5a88SLi Zhijian 		rxe_dbg_mr(mr, "iova not aligned\n");
505f04d5b3dSBob Pearson 		return RESPST_ERR_MISALIGNED_ATOMIC;
506f04d5b3dSBob Pearson 	}
507f04d5b3dSBob Pearson 
508592627ccSBob Pearson 	va = kmap_local_page(page);
509592627ccSBob Pearson 
510f04d5b3dSBob Pearson 	spin_lock_bh(&atomic_ops_lock);
511592627ccSBob Pearson 	value = *orig_val = va[page_offset >> 3];
512f04d5b3dSBob Pearson 
513f04d5b3dSBob Pearson 	if (opcode == IB_OPCODE_RC_COMPARE_SWAP) {
514f04d5b3dSBob Pearson 		if (value == compare)
515592627ccSBob Pearson 			va[page_offset >> 3] = swap_add;
516f04d5b3dSBob Pearson 	} else {
517f04d5b3dSBob Pearson 		value += swap_add;
518592627ccSBob Pearson 		va[page_offset >> 3] = value;
519f04d5b3dSBob Pearson 	}
520f04d5b3dSBob Pearson 	spin_unlock_bh(&atomic_ops_lock);
521f04d5b3dSBob Pearson 
522592627ccSBob Pearson 	kunmap_local(va);
523592627ccSBob Pearson 
524f04d5b3dSBob Pearson 	return 0;
525f04d5b3dSBob Pearson }
526f04d5b3dSBob Pearson 
527d8bdb0ebSBob Pearson #if defined CONFIG_64BIT
528592627ccSBob Pearson /* only implemented or called for 64 bit architectures */
rxe_mr_do_atomic_write(struct rxe_mr * mr,u64 iova,u64 value)529d8bdb0ebSBob Pearson int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
530d8bdb0ebSBob Pearson {
531592627ccSBob Pearson 	unsigned int page_offset;
532592627ccSBob Pearson 	struct page *page;
533d8bdb0ebSBob Pearson 	u64 *va;
534d8bdb0ebSBob Pearson 
535d8bdb0ebSBob Pearson 	/* See IBA oA19-28 */
536d8bdb0ebSBob Pearson 	if (unlikely(mr->state != RXE_MR_STATE_VALID)) {
537*5dfd5a88SLi Zhijian 		rxe_dbg_mr(mr, "mr not in valid state\n");
538d8bdb0ebSBob Pearson 		return RESPST_ERR_RKEY_VIOLATION;
539d8bdb0ebSBob Pearson 	}
540d8bdb0ebSBob Pearson 
541592627ccSBob Pearson 	if (mr->ibmr.type == IB_MR_TYPE_DMA) {
542592627ccSBob Pearson 		page_offset = iova & (PAGE_SIZE - 1);
5438d7c7c0eSJason Gunthorpe 		page = ib_virt_dma_to_page(iova);
544592627ccSBob Pearson 	} else {
545592627ccSBob Pearson 		unsigned long index;
546592627ccSBob Pearson 		int err;
547592627ccSBob Pearson 
548592627ccSBob Pearson 		/* See IBA oA19-28 */
549592627ccSBob Pearson 		err = mr_check_range(mr, iova, sizeof(value));
550592627ccSBob Pearson 		if (unlikely(err)) {
551*5dfd5a88SLi Zhijian 			rxe_dbg_mr(mr, "iova out of range\n");
552d8bdb0ebSBob Pearson 			return RESPST_ERR_RKEY_VIOLATION;
553d8bdb0ebSBob Pearson 		}
554592627ccSBob Pearson 		page_offset = rxe_mr_iova_to_page_offset(mr, iova);
555592627ccSBob Pearson 		index = rxe_mr_iova_to_index(mr, iova);
556592627ccSBob Pearson 		page = xa_load(&mr->page_list, index);
557592627ccSBob Pearson 		if (!page)
558592627ccSBob Pearson 			return RESPST_ERR_RKEY_VIOLATION;
559592627ccSBob Pearson 	}
560d8bdb0ebSBob Pearson 
561d8bdb0ebSBob Pearson 	/* See IBA A19.4.2 */
562592627ccSBob Pearson 	if (unlikely(page_offset & 0x7)) {
563*5dfd5a88SLi Zhijian 		rxe_dbg_mr(mr, "misaligned address\n");
564d8bdb0ebSBob Pearson 		return RESPST_ERR_MISALIGNED_ATOMIC;
565d8bdb0ebSBob Pearson 	}
566d8bdb0ebSBob Pearson 
567592627ccSBob Pearson 	va = kmap_local_page(page);
568592627ccSBob Pearson 
569d8bdb0ebSBob Pearson 	/* Do atomic write after all prior operations have completed */
570592627ccSBob Pearson 	smp_store_release(&va[page_offset >> 3], value);
571592627ccSBob Pearson 
572592627ccSBob Pearson 	kunmap_local(va);
573d8bdb0ebSBob Pearson 
574d8bdb0ebSBob Pearson 	return 0;
575d8bdb0ebSBob Pearson }
576d8bdb0ebSBob Pearson #else
rxe_mr_do_atomic_write(struct rxe_mr * mr,u64 iova,u64 value)577d8bdb0ebSBob Pearson int rxe_mr_do_atomic_write(struct rxe_mr *mr, u64 iova, u64 value)
578d8bdb0ebSBob Pearson {
579d8bdb0ebSBob Pearson 	return RESPST_ERR_UNSUPPORTED_OPCODE;
580d8bdb0ebSBob Pearson }
581d8bdb0ebSBob Pearson #endif
582d8bdb0ebSBob Pearson 
advance_dma_data(struct rxe_dma_info * dma,unsigned int length)5838700e3e7SMoni Shoua int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
5848700e3e7SMoni Shoua {
5858700e3e7SMoni Shoua 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
5868700e3e7SMoni Shoua 	int			offset	= dma->sge_offset;
5878700e3e7SMoni Shoua 	int			resid	= dma->resid;
5888700e3e7SMoni Shoua 
5898700e3e7SMoni Shoua 	while (length) {
5908700e3e7SMoni Shoua 		unsigned int bytes;
5918700e3e7SMoni Shoua 
5928700e3e7SMoni Shoua 		if (offset >= sge->length) {
5938700e3e7SMoni Shoua 			sge++;
5948700e3e7SMoni Shoua 			dma->cur_sge++;
5958700e3e7SMoni Shoua 			offset = 0;
5968700e3e7SMoni Shoua 			if (dma->cur_sge >= dma->num_sge)
5978700e3e7SMoni Shoua 				return -ENOSPC;
5988700e3e7SMoni Shoua 		}
5998700e3e7SMoni Shoua 
6008700e3e7SMoni Shoua 		bytes = length;
6018700e3e7SMoni Shoua 
6028700e3e7SMoni Shoua 		if (bytes > sge->length - offset)
6038700e3e7SMoni Shoua 			bytes = sge->length - offset;
6048700e3e7SMoni Shoua 
6058700e3e7SMoni Shoua 		offset	+= bytes;
6068700e3e7SMoni Shoua 		resid	-= bytes;
6078700e3e7SMoni Shoua 		length	-= bytes;
6088700e3e7SMoni Shoua 	}
6098700e3e7SMoni Shoua 
6108700e3e7SMoni Shoua 	dma->sge_offset = offset;
6118700e3e7SMoni Shoua 	dma->resid	= resid;
6128700e3e7SMoni Shoua 
6138700e3e7SMoni Shoua 	return 0;
6148700e3e7SMoni Shoua }
6158700e3e7SMoni Shoua 
lookup_mr(struct rxe_pd * pd,int access,u32 key,enum rxe_mr_lookup_type type)616364e282cSBob Pearson struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
6173902b429SBob Pearson 			 enum rxe_mr_lookup_type type)
6188700e3e7SMoni Shoua {
619364e282cSBob Pearson 	struct rxe_mr *mr;
6208700e3e7SMoni Shoua 	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
6218700e3e7SMoni Shoua 	int index = key >> 8;
6228700e3e7SMoni Shoua 
623364e282cSBob Pearson 	mr = rxe_pool_get_index(&rxe->mr_pool, index);
624364e282cSBob Pearson 	if (!mr)
6251703129eSParav Pandit 		return NULL;
6261703129eSParav Pandit 
62700134533SBob Pearson 	if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
62800134533SBob Pearson 		     (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
629875ab4a8SLi Zhijian 		     mr_pd(mr) != pd || ((access & mr->access) != access) ||
630364e282cSBob Pearson 		     mr->state != RXE_MR_STATE_VALID)) {
6313197706aSBob Pearson 		rxe_put(mr);
632364e282cSBob Pearson 		mr = NULL;
6338700e3e7SMoni Shoua 	}
6348700e3e7SMoni Shoua 
635364e282cSBob Pearson 	return mr;
6368700e3e7SMoni Shoua }
6373902b429SBob Pearson 
rxe_invalidate_mr(struct rxe_qp * qp,u32 key)638174e7b13SMd Haris Iqbal int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
6393902b429SBob Pearson {
6403902b429SBob Pearson 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
6413902b429SBob Pearson 	struct rxe_mr *mr;
64286a3fb55SBob Pearson 	int remote;
6433902b429SBob Pearson 	int ret;
6443902b429SBob Pearson 
645174e7b13SMd Haris Iqbal 	mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
6463902b429SBob Pearson 	if (!mr) {
647cb6562c3SJason Gunthorpe 		rxe_dbg_qp(qp, "No MR for key %#x\n", key);
6483902b429SBob Pearson 		ret = -EINVAL;
6493902b429SBob Pearson 		goto err;
6503902b429SBob Pearson 	}
6513902b429SBob Pearson 
65286a3fb55SBob Pearson 	remote = mr->access & RXE_ACCESS_REMOTE;
65386a3fb55SBob Pearson 	if (remote ? (key != mr->rkey) : (key != mr->lkey)) {
6542778b72bSBob Pearson 		rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n",
65586a3fb55SBob Pearson 			key, (remote ? mr->rkey : mr->lkey));
6563902b429SBob Pearson 		ret = -EINVAL;
6573902b429SBob Pearson 		goto err_drop_ref;
6583902b429SBob Pearson 	}
6593902b429SBob Pearson 
660570d2b99SBob Pearson 	if (atomic_read(&mr->num_mw) > 0) {
6612778b72bSBob Pearson 		rxe_dbg_mr(mr, "Attempt to invalidate an MR while bound to MWs\n");
662570d2b99SBob Pearson 		ret = -EINVAL;
663570d2b99SBob Pearson 		goto err_drop_ref;
664570d2b99SBob Pearson 	}
665570d2b99SBob Pearson 
66671d23639Syangx.jy@fujitsu.com 	if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) {
6672778b72bSBob Pearson 		rxe_dbg_mr(mr, "Type (%d) is wrong\n", mr->ibmr.type);
668450f4f6aSBob Pearson 		ret = -EINVAL;
669450f4f6aSBob Pearson 		goto err_drop_ref;
670450f4f6aSBob Pearson 	}
671450f4f6aSBob Pearson 
6723902b429SBob Pearson 	mr->state = RXE_MR_STATE_FREE;
6733902b429SBob Pearson 	ret = 0;
6743902b429SBob Pearson 
6753902b429SBob Pearson err_drop_ref:
6763197706aSBob Pearson 	rxe_put(mr);
6773902b429SBob Pearson err:
6783902b429SBob Pearson 	return ret;
6793902b429SBob Pearson }
6803902b429SBob Pearson 
68100134533SBob Pearson /* user can (re)register fast MR by executing a REG_MR WQE.
68200134533SBob Pearson  * user is expected to hold a reference on the ib mr until the
68300134533SBob Pearson  * WQE completes.
68400134533SBob Pearson  * Once a fast MR is created this is the only way to change the
68500134533SBob Pearson  * private keys. It is the responsibility of the user to maintain
68600134533SBob Pearson  * the ib mr keys in sync with rxe mr keys.
68700134533SBob Pearson  */
rxe_reg_fast_mr(struct rxe_qp * qp,struct rxe_send_wqe * wqe)68800134533SBob Pearson int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
68900134533SBob Pearson {
69000134533SBob Pearson 	struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
6911e755506SLi Zhijian 	u32 key = wqe->wr.wr.reg.key;
69200134533SBob Pearson 	u32 access = wqe->wr.wr.reg.access;
69300134533SBob Pearson 
69400134533SBob Pearson 	/* user can only register MR in free state */
69500134533SBob Pearson 	if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
6962778b72bSBob Pearson 		rxe_dbg_mr(mr, "mr->lkey = 0x%x not free\n", mr->lkey);
69700134533SBob Pearson 		return -EINVAL;
69800134533SBob Pearson 	}
69900134533SBob Pearson 
70000134533SBob Pearson 	/* user can only register mr with qp in same protection domain */
70100134533SBob Pearson 	if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
7022778b72bSBob Pearson 		rxe_dbg_mr(mr, "qp->pd and mr->pd don't match\n");
70300134533SBob Pearson 		return -EINVAL;
70400134533SBob Pearson 	}
70500134533SBob Pearson 
7061e755506SLi Zhijian 	/* user is only allowed to change key portion of l/rkey */
7071e755506SLi Zhijian 	if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
7082778b72bSBob Pearson 		rxe_dbg_mr(mr, "key = 0x%x has wrong index mr->lkey = 0x%x\n",
7092778b72bSBob Pearson 			key, mr->lkey);
7101e755506SLi Zhijian 		return -EINVAL;
71100134533SBob Pearson 	}
71200134533SBob Pearson 
7131e755506SLi Zhijian 	mr->access = access;
7141e755506SLi Zhijian 	mr->lkey = key;
71586a3fb55SBob Pearson 	mr->rkey = key;
716954afc5aSDaisuke Matsuda 	mr->ibmr.iova = wqe->wr.wr.reg.mr->iova;
7171e755506SLi Zhijian 	mr->state = RXE_MR_STATE_VALID;
71800134533SBob Pearson 
71900134533SBob Pearson 	return 0;
72000134533SBob Pearson }
72100134533SBob Pearson 
rxe_mr_cleanup(struct rxe_pool_elem * elem)72202827b67SBob Pearson void rxe_mr_cleanup(struct rxe_pool_elem *elem)
7233902b429SBob Pearson {
72402827b67SBob Pearson 	struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
7253902b429SBob Pearson 
726cf403679SBob Pearson 	rxe_put(mr_pd(mr));
7273902b429SBob Pearson 	ib_umem_release(mr->umem);
7283902b429SBob Pearson 
729592627ccSBob Pearson 	if (mr->ibmr.type != IB_MR_TYPE_DMA)
730592627ccSBob Pearson 		xa_destroy(&mr->page_list);
7313902b429SBob Pearson }
732