xref: /openbmc/linux/drivers/infiniband/sw/rxe/rxe_mr.c (revision e3211e41)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include "rxe.h"
8 #include "rxe_loc.h"
9 
10 /*
11  * lfsr (linear feedback shift register) with period 255
12  */
13 static u8 rxe_get_key(void)
14 {
15 	static u32 key = 1;
16 
17 	key = key << 1;
18 
19 	key |= (0 != (key & 0x100)) ^ (0 != (key & 0x10))
20 		^ (0 != (key & 0x80)) ^ (0 != (key & 0x40));
21 
22 	key &= 0xff;
23 
24 	return key;
25 }
26 
27 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
28 {
29 	switch (mr->type) {
30 	case RXE_MR_TYPE_DMA:
31 		return 0;
32 
33 	case RXE_MR_TYPE_MR:
34 		if (iova < mr->iova || length > mr->length ||
35 		    iova > mr->iova + mr->length - length)
36 			return -EFAULT;
37 		return 0;
38 
39 	default:
40 		return -EFAULT;
41 	}
42 }
43 
44 #define IB_ACCESS_REMOTE	(IB_ACCESS_REMOTE_READ		\
45 				| IB_ACCESS_REMOTE_WRITE	\
46 				| IB_ACCESS_REMOTE_ATOMIC)
47 
48 static void rxe_mr_init(int access, struct rxe_mr *mr)
49 {
50 	u32 lkey = mr->pelem.index << 8 | rxe_get_key();
51 	u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
52 
53 	mr->ibmr.lkey = lkey;
54 	mr->ibmr.rkey = rkey;
55 	mr->state = RXE_MR_STATE_INVALID;
56 	mr->type = RXE_MR_TYPE_NONE;
57 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
58 }
59 
60 void rxe_mr_cleanup(struct rxe_pool_entry *arg)
61 {
62 	struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
63 	int i;
64 
65 	ib_umem_release(mr->umem);
66 
67 	if (mr->map) {
68 		for (i = 0; i < mr->num_map; i++)
69 			kfree(mr->map[i]);
70 
71 		kfree(mr->map);
72 	}
73 }
74 
75 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
76 {
77 	int i;
78 	int num_map;
79 	struct rxe_map **map = mr->map;
80 
81 	num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
82 
83 	mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
84 	if (!mr->map)
85 		goto err1;
86 
87 	for (i = 0; i < num_map; i++) {
88 		mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
89 		if (!mr->map[i])
90 			goto err2;
91 	}
92 
93 	BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
94 
95 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
96 	mr->map_mask = RXE_BUF_PER_MAP - 1;
97 
98 	mr->num_buf = num_buf;
99 	mr->num_map = num_map;
100 	mr->max_buf = num_map * RXE_BUF_PER_MAP;
101 
102 	return 0;
103 
104 err2:
105 	for (i--; i >= 0; i--)
106 		kfree(mr->map[i]);
107 
108 	kfree(mr->map);
109 err1:
110 	return -ENOMEM;
111 }
112 
113 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
114 {
115 	rxe_mr_init(access, mr);
116 
117 	mr->ibmr.pd = &pd->ibpd;
118 	mr->access = access;
119 	mr->state = RXE_MR_STATE_VALID;
120 	mr->type = RXE_MR_TYPE_DMA;
121 }
122 
123 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
124 		     int access, struct ib_udata *udata, struct rxe_mr *mr)
125 {
126 	struct rxe_map		**map;
127 	struct rxe_phys_buf	*buf = NULL;
128 	struct ib_umem		*umem;
129 	struct sg_page_iter	sg_iter;
130 	int			num_buf;
131 	void			*vaddr;
132 	int err;
133 
134 	umem = ib_umem_get(pd->ibpd.device, start, length, access);
135 	if (IS_ERR(umem)) {
136 		pr_warn("err %d from rxe_umem_get\n",
137 			(int)PTR_ERR(umem));
138 		err = -EINVAL;
139 		goto err1;
140 	}
141 
142 	mr->umem = umem;
143 	num_buf = ib_umem_num_pages(umem);
144 
145 	rxe_mr_init(access, mr);
146 
147 	err = rxe_mr_alloc(mr, num_buf);
148 	if (err) {
149 		pr_warn("err %d from rxe_mr_alloc\n", err);
150 		ib_umem_release(umem);
151 		goto err1;
152 	}
153 
154 	mr->page_shift = PAGE_SHIFT;
155 	mr->page_mask = PAGE_SIZE - 1;
156 
157 	num_buf			= 0;
158 	map = mr->map;
159 	if (length > 0) {
160 		buf = map[0]->buf;
161 
162 		for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
163 			if (num_buf >= RXE_BUF_PER_MAP) {
164 				map++;
165 				buf = map[0]->buf;
166 				num_buf = 0;
167 			}
168 
169 			vaddr = page_address(sg_page_iter_page(&sg_iter));
170 			if (!vaddr) {
171 				pr_warn("null vaddr\n");
172 				ib_umem_release(umem);
173 				err = -ENOMEM;
174 				goto err1;
175 			}
176 
177 			buf->addr = (uintptr_t)vaddr;
178 			buf->size = PAGE_SIZE;
179 			num_buf++;
180 			buf++;
181 
182 		}
183 	}
184 
185 	mr->ibmr.pd = &pd->ibpd;
186 	mr->umem = umem;
187 	mr->access = access;
188 	mr->length = length;
189 	mr->iova = iova;
190 	mr->va = start;
191 	mr->offset = ib_umem_offset(umem);
192 	mr->state = RXE_MR_STATE_VALID;
193 	mr->type = RXE_MR_TYPE_MR;
194 
195 	return 0;
196 
197 err1:
198 	return err;
199 }
200 
201 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
202 {
203 	int err;
204 
205 	rxe_mr_init(0, mr);
206 
207 	/* In fastreg, we also set the rkey */
208 	mr->ibmr.rkey = mr->ibmr.lkey;
209 
210 	err = rxe_mr_alloc(mr, max_pages);
211 	if (err)
212 		goto err1;
213 
214 	mr->ibmr.pd = &pd->ibpd;
215 	mr->max_buf = max_pages;
216 	mr->state = RXE_MR_STATE_FREE;
217 	mr->type = RXE_MR_TYPE_MR;
218 
219 	return 0;
220 
221 err1:
222 	return err;
223 }
224 
225 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
226 			size_t *offset_out)
227 {
228 	size_t offset = iova - mr->iova + mr->offset;
229 	int			map_index;
230 	int			buf_index;
231 	u64			length;
232 
233 	if (likely(mr->page_shift)) {
234 		*offset_out = offset & mr->page_mask;
235 		offset >>= mr->page_shift;
236 		*n_out = offset & mr->map_mask;
237 		*m_out = offset >> mr->map_shift;
238 	} else {
239 		map_index = 0;
240 		buf_index = 0;
241 
242 		length = mr->map[map_index]->buf[buf_index].size;
243 
244 		while (offset >= length) {
245 			offset -= length;
246 			buf_index++;
247 
248 			if (buf_index == RXE_BUF_PER_MAP) {
249 				map_index++;
250 				buf_index = 0;
251 			}
252 			length = mr->map[map_index]->buf[buf_index].size;
253 		}
254 
255 		*m_out = map_index;
256 		*n_out = buf_index;
257 		*offset_out = offset;
258 	}
259 }
260 
261 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
262 {
263 	size_t offset;
264 	int m, n;
265 	void *addr;
266 
267 	if (mr->state != RXE_MR_STATE_VALID) {
268 		pr_warn("mr not in valid state\n");
269 		addr = NULL;
270 		goto out;
271 	}
272 
273 	if (!mr->map) {
274 		addr = (void *)(uintptr_t)iova;
275 		goto out;
276 	}
277 
278 	if (mr_check_range(mr, iova, length)) {
279 		pr_warn("range violation\n");
280 		addr = NULL;
281 		goto out;
282 	}
283 
284 	lookup_iova(mr, iova, &m, &n, &offset);
285 
286 	if (offset + length > mr->map[m]->buf[n].size) {
287 		pr_warn("crosses page boundary\n");
288 		addr = NULL;
289 		goto out;
290 	}
291 
292 	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
293 
294 out:
295 	return addr;
296 }
297 
298 /* copy data from a range (vaddr, vaddr+length-1) to or from
299  * a mr object starting at iova. Compute incremental value of
300  * crc32 if crcp is not zero. caller must hold a reference to mr
301  */
302 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
303 		enum copy_direction dir, u32 *crcp)
304 {
305 	int			err;
306 	int			bytes;
307 	u8			*va;
308 	struct rxe_map		**map;
309 	struct rxe_phys_buf	*buf;
310 	int			m;
311 	int			i;
312 	size_t			offset;
313 	u32			crc = crcp ? (*crcp) : 0;
314 
315 	if (length == 0)
316 		return 0;
317 
318 	if (mr->type == RXE_MR_TYPE_DMA) {
319 		u8 *src, *dest;
320 
321 		src = (dir == to_mr_obj) ? addr : ((void *)(uintptr_t)iova);
322 
323 		dest = (dir == to_mr_obj) ? ((void *)(uintptr_t)iova) : addr;
324 
325 		memcpy(dest, src, length);
326 
327 		if (crcp)
328 			*crcp = rxe_crc32(to_rdev(mr->ibmr.device), *crcp, dest,
329 					  length);
330 
331 		return 0;
332 	}
333 
334 	WARN_ON_ONCE(!mr->map);
335 
336 	err = mr_check_range(mr, iova, length);
337 	if (err) {
338 		err = -EFAULT;
339 		goto err1;
340 	}
341 
342 	lookup_iova(mr, iova, &m, &i, &offset);
343 
344 	map = mr->map + m;
345 	buf	= map[0]->buf + i;
346 
347 	while (length > 0) {
348 		u8 *src, *dest;
349 
350 		va	= (u8 *)(uintptr_t)buf->addr + offset;
351 		src = (dir == to_mr_obj) ? addr : va;
352 		dest = (dir == to_mr_obj) ? va : addr;
353 
354 		bytes	= buf->size - offset;
355 
356 		if (bytes > length)
357 			bytes = length;
358 
359 		memcpy(dest, src, bytes);
360 
361 		if (crcp)
362 			crc = rxe_crc32(to_rdev(mr->ibmr.device), crc, dest,
363 					bytes);
364 
365 		length	-= bytes;
366 		addr	+= bytes;
367 
368 		offset	= 0;
369 		buf++;
370 		i++;
371 
372 		if (i == RXE_BUF_PER_MAP) {
373 			i = 0;
374 			map++;
375 			buf = map[0]->buf;
376 		}
377 	}
378 
379 	if (crcp)
380 		*crcp = crc;
381 
382 	return 0;
383 
384 err1:
385 	return err;
386 }
387 
388 /* copy data in or out of a wqe, i.e. sg list
389  * under the control of a dma descriptor
390  */
391 int copy_data(
392 	struct rxe_pd		*pd,
393 	int			access,
394 	struct rxe_dma_info	*dma,
395 	void			*addr,
396 	int			length,
397 	enum copy_direction	dir,
398 	u32			*crcp)
399 {
400 	int			bytes;
401 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
402 	int			offset	= dma->sge_offset;
403 	int			resid	= dma->resid;
404 	struct rxe_mr		*mr	= NULL;
405 	u64			iova;
406 	int			err;
407 
408 	if (length == 0)
409 		return 0;
410 
411 	if (length > resid) {
412 		err = -EINVAL;
413 		goto err2;
414 	}
415 
416 	if (sge->length && (offset < sge->length)) {
417 		mr = lookup_mr(pd, access, sge->lkey, lookup_local);
418 		if (!mr) {
419 			err = -EINVAL;
420 			goto err1;
421 		}
422 	}
423 
424 	while (length > 0) {
425 		bytes = length;
426 
427 		if (offset >= sge->length) {
428 			if (mr) {
429 				rxe_drop_ref(mr);
430 				mr = NULL;
431 			}
432 			sge++;
433 			dma->cur_sge++;
434 			offset = 0;
435 
436 			if (dma->cur_sge >= dma->num_sge) {
437 				err = -ENOSPC;
438 				goto err2;
439 			}
440 
441 			if (sge->length) {
442 				mr = lookup_mr(pd, access, sge->lkey,
443 					       lookup_local);
444 				if (!mr) {
445 					err = -EINVAL;
446 					goto err1;
447 				}
448 			} else {
449 				continue;
450 			}
451 		}
452 
453 		if (bytes > sge->length - offset)
454 			bytes = sge->length - offset;
455 
456 		if (bytes > 0) {
457 			iova = sge->addr + offset;
458 
459 			err = rxe_mr_copy(mr, iova, addr, bytes, dir, crcp);
460 			if (err)
461 				goto err2;
462 
463 			offset	+= bytes;
464 			resid	-= bytes;
465 			length	-= bytes;
466 			addr	+= bytes;
467 		}
468 	}
469 
470 	dma->sge_offset = offset;
471 	dma->resid	= resid;
472 
473 	if (mr)
474 		rxe_drop_ref(mr);
475 
476 	return 0;
477 
478 err2:
479 	if (mr)
480 		rxe_drop_ref(mr);
481 err1:
482 	return err;
483 }
484 
485 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
486 {
487 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
488 	int			offset	= dma->sge_offset;
489 	int			resid	= dma->resid;
490 
491 	while (length) {
492 		unsigned int bytes;
493 
494 		if (offset >= sge->length) {
495 			sge++;
496 			dma->cur_sge++;
497 			offset = 0;
498 			if (dma->cur_sge >= dma->num_sge)
499 				return -ENOSPC;
500 		}
501 
502 		bytes = length;
503 
504 		if (bytes > sge->length - offset)
505 			bytes = sge->length - offset;
506 
507 		offset	+= bytes;
508 		resid	-= bytes;
509 		length	-= bytes;
510 	}
511 
512 	dma->sge_offset = offset;
513 	dma->resid	= resid;
514 
515 	return 0;
516 }
517 
518 /* (1) find the mr corresponding to lkey/rkey
519  *     depending on lookup_type
520  * (2) verify that the (qp) pd matches the mr pd
521  * (3) verify that the mr can support the requested access
522  * (4) verify that mr state is valid
523  */
524 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
525 			 enum lookup_type type)
526 {
527 	struct rxe_mr *mr;
528 	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
529 	int index = key >> 8;
530 
531 	mr = rxe_pool_get_index(&rxe->mr_pool, index);
532 	if (!mr)
533 		return NULL;
534 
535 	if (unlikely((type == lookup_local && mr_lkey(mr) != key) ||
536 		     (type == lookup_remote && mr_rkey(mr) != key) ||
537 		     mr_pd(mr) != pd || (access && !(access & mr->access)) ||
538 		     mr->state != RXE_MR_STATE_VALID)) {
539 		rxe_drop_ref(mr);
540 		mr = NULL;
541 	}
542 
543 	return mr;
544 }
545