xref: /openbmc/linux/drivers/infiniband/sw/rxe/rxe_mr.c (revision 32daa5d7)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include "rxe.h"
8 #include "rxe_loc.h"
9 
10 /*
11  * lfsr (linear feedback shift register) with period 255
12  */
13 static u8 rxe_get_key(void)
14 {
15 	static u32 key = 1;
16 
17 	key = key << 1;
18 
19 	key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
20 		^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
21 
22 	key &= 0xff;
23 
24 	return key;
25 }
26 
27 int mem_check_range(struct rxe_mem *mem, u64 iova, size_t length)
28 {
29 	switch (mem->type) {
30 	case RXE_MEM_TYPE_DMA:
31 		return 0;
32 
33 	case RXE_MEM_TYPE_MR:
34 		if (iova < mem->iova ||
35 		    length > mem->length ||
36 		    iova > mem->iova + mem->length - length)
37 			return -EFAULT;
38 		return 0;
39 
40 	default:
41 		return -EFAULT;
42 	}
43 }
44 
45 #define IB_ACCESS_REMOTE	(IB_ACCESS_REMOTE_READ		\
46 				| IB_ACCESS_REMOTE_WRITE	\
47 				| IB_ACCESS_REMOTE_ATOMIC)
48 
49 static void rxe_mem_init(int access, struct rxe_mem *mem)
50 {
51 	u32 lkey = mem->pelem.index << 8 | rxe_get_key();
52 	u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
53 
54 	mem->ibmr.lkey		= lkey;
55 	mem->ibmr.rkey		= rkey;
56 	mem->state		= RXE_MEM_STATE_INVALID;
57 	mem->type		= RXE_MEM_TYPE_NONE;
58 	mem->map_shift		= ilog2(RXE_BUF_PER_MAP);
59 }
60 
61 void rxe_mem_cleanup(struct rxe_pool_entry *arg)
62 {
63 	struct rxe_mem *mem = container_of(arg, typeof(*mem), pelem);
64 	int i;
65 
66 	ib_umem_release(mem->umem);
67 
68 	if (mem->map) {
69 		for (i = 0; i < mem->num_map; i++)
70 			kfree(mem->map[i]);
71 
72 		kfree(mem->map);
73 	}
74 }
75 
76 static int rxe_mem_alloc(struct rxe_mem *mem, int num_buf)
77 {
78 	int i;
79 	int num_map;
80 	struct rxe_map **map = mem->map;
81 
82 	num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
83 
84 	mem->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
85 	if (!mem->map)
86 		goto err1;
87 
88 	for (i = 0; i < num_map; i++) {
89 		mem->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
90 		if (!mem->map[i])
91 			goto err2;
92 	}
93 
94 	BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
95 
96 	mem->map_shift	= ilog2(RXE_BUF_PER_MAP);
97 	mem->map_mask	= RXE_BUF_PER_MAP - 1;
98 
99 	mem->num_buf = num_buf;
100 	mem->num_map = num_map;
101 	mem->max_buf = num_map * RXE_BUF_PER_MAP;
102 
103 	return 0;
104 
105 err2:
106 	for (i--; i >= 0; i--)
107 		kfree(mem->map[i]);
108 
109 	kfree(mem->map);
110 err1:
111 	return -ENOMEM;
112 }
113 
114 void rxe_mem_init_dma(struct rxe_pd *pd,
115 		      int access, struct rxe_mem *mem)
116 {
117 	rxe_mem_init(access, mem);
118 
119 	mem->ibmr.pd		= &pd->ibpd;
120 	mem->access		= access;
121 	mem->state		= RXE_MEM_STATE_VALID;
122 	mem->type		= RXE_MEM_TYPE_DMA;
123 }
124 
125 int rxe_mem_init_user(struct rxe_pd *pd, u64 start,
126 		      u64 length, u64 iova, int access, struct ib_udata *udata,
127 		      struct rxe_mem *mem)
128 {
129 	struct rxe_map		**map;
130 	struct rxe_phys_buf	*buf = NULL;
131 	struct ib_umem		*umem;
132 	struct sg_page_iter	sg_iter;
133 	int			num_buf;
134 	void			*vaddr;
135 	int err;
136 
137 	umem = ib_umem_get(pd->ibpd.device, start, length, access);
138 	if (IS_ERR(umem)) {
139 		pr_warn("err %d from rxe_umem_get\n",
140 			(int)PTR_ERR(umem));
141 		err = -EINVAL;
142 		goto err1;
143 	}
144 
145 	mem->umem = umem;
146 	num_buf = ib_umem_num_pages(umem);
147 
148 	rxe_mem_init(access, mem);
149 
150 	err = rxe_mem_alloc(mem, num_buf);
151 	if (err) {
152 		pr_warn("err %d from rxe_mem_alloc\n", err);
153 		ib_umem_release(umem);
154 		goto err1;
155 	}
156 
157 	mem->page_shift		= PAGE_SHIFT;
158 	mem->page_mask = PAGE_SIZE - 1;
159 
160 	num_buf			= 0;
161 	map			= mem->map;
162 	if (length > 0) {
163 		buf = map[0]->buf;
164 
165 		for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
166 			if (num_buf >= RXE_BUF_PER_MAP) {
167 				map++;
168 				buf = map[0]->buf;
169 				num_buf = 0;
170 			}
171 
172 			vaddr = page_address(sg_page_iter_page(&sg_iter));
173 			if (!vaddr) {
174 				pr_warn("null vaddr\n");
175 				ib_umem_release(umem);
176 				err = -ENOMEM;
177 				goto err1;
178 			}
179 
180 			buf->addr = (uintptr_t)vaddr;
181 			buf->size = PAGE_SIZE;
182 			num_buf++;
183 			buf++;
184 
185 		}
186 	}
187 
188 	mem->ibmr.pd		= &pd->ibpd;
189 	mem->umem		= umem;
190 	mem->access		= access;
191 	mem->length		= length;
192 	mem->iova		= iova;
193 	mem->va			= start;
194 	mem->offset		= ib_umem_offset(umem);
195 	mem->state		= RXE_MEM_STATE_VALID;
196 	mem->type		= RXE_MEM_TYPE_MR;
197 
198 	return 0;
199 
200 err1:
201 	return err;
202 }
203 
204 int rxe_mem_init_fast(struct rxe_pd *pd,
205 		      int max_pages, struct rxe_mem *mem)
206 {
207 	int err;
208 
209 	rxe_mem_init(0, mem);
210 
211 	/* In fastreg, we also set the rkey */
212 	mem->ibmr.rkey = mem->ibmr.lkey;
213 
214 	err = rxe_mem_alloc(mem, max_pages);
215 	if (err)
216 		goto err1;
217 
218 	mem->ibmr.pd		= &pd->ibpd;
219 	mem->max_buf		= max_pages;
220 	mem->state		= RXE_MEM_STATE_FREE;
221 	mem->type		= RXE_MEM_TYPE_MR;
222 
223 	return 0;
224 
225 err1:
226 	return err;
227 }
228 
229 static void lookup_iova(
230 	struct rxe_mem	*mem,
231 	u64			iova,
232 	int			*m_out,
233 	int			*n_out,
234 	size_t			*offset_out)
235 {
236 	size_t			offset = iova - mem->iova + mem->offset;
237 	int			map_index;
238 	int			buf_index;
239 	u64			length;
240 
241 	if (likely(mem->page_shift)) {
242 		*offset_out = offset & mem->page_mask;
243 		offset >>= mem->page_shift;
244 		*n_out = offset & mem->map_mask;
245 		*m_out = offset >> mem->map_shift;
246 	} else {
247 		map_index = 0;
248 		buf_index = 0;
249 
250 		length = mem->map[map_index]->buf[buf_index].size;
251 
252 		while (offset >= length) {
253 			offset -= length;
254 			buf_index++;
255 
256 			if (buf_index == RXE_BUF_PER_MAP) {
257 				map_index++;
258 				buf_index = 0;
259 			}
260 			length = mem->map[map_index]->buf[buf_index].size;
261 		}
262 
263 		*m_out = map_index;
264 		*n_out = buf_index;
265 		*offset_out = offset;
266 	}
267 }
268 
269 void *iova_to_vaddr(struct rxe_mem *mem, u64 iova, int length)
270 {
271 	size_t offset;
272 	int m, n;
273 	void *addr;
274 
275 	if (mem->state != RXE_MEM_STATE_VALID) {
276 		pr_warn("mem not in valid state\n");
277 		addr = NULL;
278 		goto out;
279 	}
280 
281 	if (!mem->map) {
282 		addr = (void *)(uintptr_t)iova;
283 		goto out;
284 	}
285 
286 	if (mem_check_range(mem, iova, length)) {
287 		pr_warn("range violation\n");
288 		addr = NULL;
289 		goto out;
290 	}
291 
292 	lookup_iova(mem, iova, &m, &n, &offset);
293 
294 	if (offset + length > mem->map[m]->buf[n].size) {
295 		pr_warn("crosses page boundary\n");
296 		addr = NULL;
297 		goto out;
298 	}
299 
300 	addr = (void *)(uintptr_t)mem->map[m]->buf[n].addr + offset;
301 
302 out:
303 	return addr;
304 }
305 
306 /* copy data from a range (vaddr, vaddr+length-1) to or from
307  * a mem object starting at iova. Compute incremental value of
308  * crc32 if crcp is not zero. caller must hold a reference to mem
309  */
310 int rxe_mem_copy(struct rxe_mem *mem, u64 iova, void *addr, int length,
311 		 enum copy_direction dir, u32 *crcp)
312 {
313 	int			err;
314 	int			bytes;
315 	u8			*va;
316 	struct rxe_map		**map;
317 	struct rxe_phys_buf	*buf;
318 	int			m;
319 	int			i;
320 	size_t			offset;
321 	u32			crc = crcp ? (*crcp) : 0;
322 
323 	if (length == 0)
324 		return 0;
325 
326 	if (mem->type == RXE_MEM_TYPE_DMA) {
327 		u8 *src, *dest;
328 
329 		src  = (dir == to_mem_obj) ?
330 			addr : ((void *)(uintptr_t)iova);
331 
332 		dest = (dir == to_mem_obj) ?
333 			((void *)(uintptr_t)iova) : addr;
334 
335 		memcpy(dest, src, length);
336 
337 		if (crcp)
338 			*crcp = rxe_crc32(to_rdev(mem->ibmr.device),
339 					*crcp, dest, length);
340 
341 		return 0;
342 	}
343 
344 	WARN_ON_ONCE(!mem->map);
345 
346 	err = mem_check_range(mem, iova, length);
347 	if (err) {
348 		err = -EFAULT;
349 		goto err1;
350 	}
351 
352 	lookup_iova(mem, iova, &m, &i, &offset);
353 
354 	map	= mem->map + m;
355 	buf	= map[0]->buf + i;
356 
357 	while (length > 0) {
358 		u8 *src, *dest;
359 
360 		va	= (u8 *)(uintptr_t)buf->addr + offset;
361 		src  = (dir == to_mem_obj) ? addr : va;
362 		dest = (dir == to_mem_obj) ? va : addr;
363 
364 		bytes	= buf->size - offset;
365 
366 		if (bytes > length)
367 			bytes = length;
368 
369 		memcpy(dest, src, bytes);
370 
371 		if (crcp)
372 			crc = rxe_crc32(to_rdev(mem->ibmr.device),
373 					crc, dest, bytes);
374 
375 		length	-= bytes;
376 		addr	+= bytes;
377 
378 		offset	= 0;
379 		buf++;
380 		i++;
381 
382 		if (i == RXE_BUF_PER_MAP) {
383 			i = 0;
384 			map++;
385 			buf = map[0]->buf;
386 		}
387 	}
388 
389 	if (crcp)
390 		*crcp = crc;
391 
392 	return 0;
393 
394 err1:
395 	return err;
396 }
397 
398 /* copy data in or out of a wqe, i.e. sg list
399  * under the control of a dma descriptor
400  */
401 int copy_data(
402 	struct rxe_pd		*pd,
403 	int			access,
404 	struct rxe_dma_info	*dma,
405 	void			*addr,
406 	int			length,
407 	enum copy_direction	dir,
408 	u32			*crcp)
409 {
410 	int			bytes;
411 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
412 	int			offset	= dma->sge_offset;
413 	int			resid	= dma->resid;
414 	struct rxe_mem		*mem	= NULL;
415 	u64			iova;
416 	int			err;
417 
418 	if (length == 0)
419 		return 0;
420 
421 	if (length > resid) {
422 		err = -EINVAL;
423 		goto err2;
424 	}
425 
426 	if (sge->length && (offset < sge->length)) {
427 		mem = lookup_mem(pd, access, sge->lkey, lookup_local);
428 		if (!mem) {
429 			err = -EINVAL;
430 			goto err1;
431 		}
432 	}
433 
434 	while (length > 0) {
435 		bytes = length;
436 
437 		if (offset >= sge->length) {
438 			if (mem) {
439 				rxe_drop_ref(mem);
440 				mem = NULL;
441 			}
442 			sge++;
443 			dma->cur_sge++;
444 			offset = 0;
445 
446 			if (dma->cur_sge >= dma->num_sge) {
447 				err = -ENOSPC;
448 				goto err2;
449 			}
450 
451 			if (sge->length) {
452 				mem = lookup_mem(pd, access, sge->lkey,
453 						 lookup_local);
454 				if (!mem) {
455 					err = -EINVAL;
456 					goto err1;
457 				}
458 			} else {
459 				continue;
460 			}
461 		}
462 
463 		if (bytes > sge->length - offset)
464 			bytes = sge->length - offset;
465 
466 		if (bytes > 0) {
467 			iova = sge->addr + offset;
468 
469 			err = rxe_mem_copy(mem, iova, addr, bytes, dir, crcp);
470 			if (err)
471 				goto err2;
472 
473 			offset	+= bytes;
474 			resid	-= bytes;
475 			length	-= bytes;
476 			addr	+= bytes;
477 		}
478 	}
479 
480 	dma->sge_offset = offset;
481 	dma->resid	= resid;
482 
483 	if (mem)
484 		rxe_drop_ref(mem);
485 
486 	return 0;
487 
488 err2:
489 	if (mem)
490 		rxe_drop_ref(mem);
491 err1:
492 	return err;
493 }
494 
495 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
496 {
497 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
498 	int			offset	= dma->sge_offset;
499 	int			resid	= dma->resid;
500 
501 	while (length) {
502 		unsigned int bytes;
503 
504 		if (offset >= sge->length) {
505 			sge++;
506 			dma->cur_sge++;
507 			offset = 0;
508 			if (dma->cur_sge >= dma->num_sge)
509 				return -ENOSPC;
510 		}
511 
512 		bytes = length;
513 
514 		if (bytes > sge->length - offset)
515 			bytes = sge->length - offset;
516 
517 		offset	+= bytes;
518 		resid	-= bytes;
519 		length	-= bytes;
520 	}
521 
522 	dma->sge_offset = offset;
523 	dma->resid	= resid;
524 
525 	return 0;
526 }
527 
528 /* (1) find the mem (mr or mw) corresponding to lkey/rkey
529  *     depending on lookup_type
530  * (2) verify that the (qp) pd matches the mem pd
531  * (3) verify that the mem can support the requested access
532  * (4) verify that mem state is valid
533  */
534 struct rxe_mem *lookup_mem(struct rxe_pd *pd, int access, u32 key,
535 			   enum lookup_type type)
536 {
537 	struct rxe_mem *mem;
538 	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
539 	int index = key >> 8;
540 
541 	mem = rxe_pool_get_index(&rxe->mr_pool, index);
542 	if (!mem)
543 		return NULL;
544 
545 	if (unlikely((type == lookup_local && mr_lkey(mem) != key) ||
546 		     (type == lookup_remote && mr_rkey(mem) != key) ||
547 		     mr_pd(mem) != pd ||
548 		     (access && !(access & mem->access)) ||
549 		     mem->state != RXE_MEM_STATE_VALID)) {
550 		rxe_drop_ref(mem);
551 		mem = NULL;
552 	}
553 
554 	return mem;
555 }
556