xref: /openbmc/linux/drivers/infiniband/sw/rxe/rxe_mr.c (revision f2d8e15b)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include "rxe.h"
8 #include "rxe_loc.h"
9 
10 /* Return a random 8 bit key value that is
11  * different than the last_key. Set last_key to -1
12  * if this is the first key for an MR or MW
13  */
14 u8 rxe_get_next_key(u32 last_key)
15 {
16 	u8 key;
17 
18 	do {
19 		get_random_bytes(&key, 1);
20 	} while (key == last_key);
21 
22 	return key;
23 }
24 
25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26 {
27 
28 
29 	switch (mr->type) {
30 	case IB_MR_TYPE_DMA:
31 		return 0;
32 
33 	case IB_MR_TYPE_USER:
34 	case IB_MR_TYPE_MEM_REG:
35 		if (iova < mr->iova || length > mr->length ||
36 		    iova > mr->iova + mr->length - length)
37 			return -EFAULT;
38 		return 0;
39 
40 	default:
41 		pr_warn("%s: mr type (%d) not supported\n",
42 			__func__, mr->type);
43 		return -EFAULT;
44 	}
45 }
46 
47 #define IB_ACCESS_REMOTE	(IB_ACCESS_REMOTE_READ		\
48 				| IB_ACCESS_REMOTE_WRITE	\
49 				| IB_ACCESS_REMOTE_ATOMIC)
50 
51 static void rxe_mr_init(int access, struct rxe_mr *mr)
52 {
53 	u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
54 	u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
55 
56 	/* set ibmr->l/rkey and also copy into private l/rkey
57 	 * for user MRs these will always be the same
58 	 * for cases where caller 'owns' the key portion
59 	 * they may be different until REG_MR WQE is executed.
60 	 */
61 	mr->lkey = mr->ibmr.lkey = lkey;
62 	mr->rkey = mr->ibmr.rkey = rkey;
63 
64 	mr->state = RXE_MR_STATE_INVALID;
65 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
66 }
67 
68 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
69 {
70 	int i;
71 	int num_map;
72 	struct rxe_map **map = mr->map;
73 
74 	num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
75 
76 	mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
77 	if (!mr->map)
78 		goto err1;
79 
80 	for (i = 0; i < num_map; i++) {
81 		mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
82 		if (!mr->map[i])
83 			goto err2;
84 	}
85 
86 	BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
87 
88 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
89 	mr->map_mask = RXE_BUF_PER_MAP - 1;
90 
91 	mr->num_buf = num_buf;
92 	mr->num_map = num_map;
93 	mr->max_buf = num_map * RXE_BUF_PER_MAP;
94 
95 	return 0;
96 
97 err2:
98 	for (i--; i >= 0; i--)
99 		kfree(mr->map[i]);
100 
101 	kfree(mr->map);
102 err1:
103 	return -ENOMEM;
104 }
105 
106 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
107 {
108 	rxe_mr_init(access, mr);
109 
110 	mr->ibmr.pd = &pd->ibpd;
111 	mr->access = access;
112 	mr->state = RXE_MR_STATE_VALID;
113 	mr->type = IB_MR_TYPE_DMA;
114 }
115 
116 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
117 		     int access, struct rxe_mr *mr)
118 {
119 	struct rxe_map		**map;
120 	struct rxe_phys_buf	*buf = NULL;
121 	struct ib_umem		*umem;
122 	struct sg_page_iter	sg_iter;
123 	int			num_buf;
124 	void			*vaddr;
125 	int err;
126 	int i;
127 
128 	umem = ib_umem_get(pd->ibpd.device, start, length, access);
129 	if (IS_ERR(umem)) {
130 		pr_warn("%s: Unable to pin memory region err = %d\n",
131 			__func__, (int)PTR_ERR(umem));
132 		err = PTR_ERR(umem);
133 		goto err_out;
134 	}
135 
136 	num_buf = ib_umem_num_pages(umem);
137 
138 	rxe_mr_init(access, mr);
139 
140 	err = rxe_mr_alloc(mr, num_buf);
141 	if (err) {
142 		pr_warn("%s: Unable to allocate memory for map\n",
143 				__func__);
144 		goto err_release_umem;
145 	}
146 
147 	mr->page_shift = PAGE_SHIFT;
148 	mr->page_mask = PAGE_SIZE - 1;
149 
150 	num_buf			= 0;
151 	map = mr->map;
152 	if (length > 0) {
153 		buf = map[0]->buf;
154 
155 		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
156 			if (num_buf >= RXE_BUF_PER_MAP) {
157 				map++;
158 				buf = map[0]->buf;
159 				num_buf = 0;
160 			}
161 
162 			vaddr = page_address(sg_page_iter_page(&sg_iter));
163 			if (!vaddr) {
164 				pr_warn("%s: Unable to get virtual address\n",
165 						__func__);
166 				err = -ENOMEM;
167 				goto err_cleanup_map;
168 			}
169 
170 			buf->addr = (uintptr_t)vaddr;
171 			buf->size = PAGE_SIZE;
172 			num_buf++;
173 			buf++;
174 
175 		}
176 	}
177 
178 	mr->ibmr.pd = &pd->ibpd;
179 	mr->umem = umem;
180 	mr->access = access;
181 	mr->length = length;
182 	mr->iova = iova;
183 	mr->va = start;
184 	mr->offset = ib_umem_offset(umem);
185 	mr->state = RXE_MR_STATE_VALID;
186 	mr->type = IB_MR_TYPE_USER;
187 
188 	return 0;
189 
190 err_cleanup_map:
191 	for (i = 0; i < mr->num_map; i++)
192 		kfree(mr->map[i]);
193 	kfree(mr->map);
194 err_release_umem:
195 	ib_umem_release(umem);
196 err_out:
197 	return err;
198 }
199 
200 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
201 {
202 	int err;
203 
204 	/* always allow remote access for FMRs */
205 	rxe_mr_init(IB_ACCESS_REMOTE, mr);
206 
207 	err = rxe_mr_alloc(mr, max_pages);
208 	if (err)
209 		goto err1;
210 
211 	mr->ibmr.pd = &pd->ibpd;
212 	mr->max_buf = max_pages;
213 	mr->state = RXE_MR_STATE_FREE;
214 	mr->type = IB_MR_TYPE_MEM_REG;
215 
216 	return 0;
217 
218 err1:
219 	return err;
220 }
221 
222 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
223 			size_t *offset_out)
224 {
225 	size_t offset = iova - mr->iova + mr->offset;
226 	int			map_index;
227 	int			buf_index;
228 	u64			length;
229 
230 	if (likely(mr->page_shift)) {
231 		*offset_out = offset & mr->page_mask;
232 		offset >>= mr->page_shift;
233 		*n_out = offset & mr->map_mask;
234 		*m_out = offset >> mr->map_shift;
235 	} else {
236 		map_index = 0;
237 		buf_index = 0;
238 
239 		length = mr->map[map_index]->buf[buf_index].size;
240 
241 		while (offset >= length) {
242 			offset -= length;
243 			buf_index++;
244 
245 			if (buf_index == RXE_BUF_PER_MAP) {
246 				map_index++;
247 				buf_index = 0;
248 			}
249 			length = mr->map[map_index]->buf[buf_index].size;
250 		}
251 
252 		*m_out = map_index;
253 		*n_out = buf_index;
254 		*offset_out = offset;
255 	}
256 }
257 
258 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
259 {
260 	size_t offset;
261 	int m, n;
262 	void *addr;
263 
264 	if (mr->state != RXE_MR_STATE_VALID) {
265 		pr_warn("mr not in valid state\n");
266 		addr = NULL;
267 		goto out;
268 	}
269 
270 	if (!mr->map) {
271 		addr = (void *)(uintptr_t)iova;
272 		goto out;
273 	}
274 
275 	if (mr_check_range(mr, iova, length)) {
276 		pr_warn("range violation\n");
277 		addr = NULL;
278 		goto out;
279 	}
280 
281 	lookup_iova(mr, iova, &m, &n, &offset);
282 
283 	if (offset + length > mr->map[m]->buf[n].size) {
284 		pr_warn("crosses page boundary\n");
285 		addr = NULL;
286 		goto out;
287 	}
288 
289 	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
290 
291 out:
292 	return addr;
293 }
294 
295 /* copy data from a range (vaddr, vaddr+length-1) to or from
296  * a mr object starting at iova.
297  */
298 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
299 		enum rxe_mr_copy_dir dir)
300 {
301 	int			err;
302 	int			bytes;
303 	u8			*va;
304 	struct rxe_map		**map;
305 	struct rxe_phys_buf	*buf;
306 	int			m;
307 	int			i;
308 	size_t			offset;
309 
310 	if (length == 0)
311 		return 0;
312 
313 	if (mr->type == IB_MR_TYPE_DMA) {
314 		u8 *src, *dest;
315 
316 		src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
317 
318 		dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
319 
320 		memcpy(dest, src, length);
321 
322 		return 0;
323 	}
324 
325 	WARN_ON_ONCE(!mr->map);
326 
327 	err = mr_check_range(mr, iova, length);
328 	if (err) {
329 		err = -EFAULT;
330 		goto err1;
331 	}
332 
333 	lookup_iova(mr, iova, &m, &i, &offset);
334 
335 	map = mr->map + m;
336 	buf	= map[0]->buf + i;
337 
338 	while (length > 0) {
339 		u8 *src, *dest;
340 
341 		va	= (u8 *)(uintptr_t)buf->addr + offset;
342 		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
343 		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
344 
345 		bytes	= buf->size - offset;
346 
347 		if (bytes > length)
348 			bytes = length;
349 
350 		memcpy(dest, src, bytes);
351 
352 		length	-= bytes;
353 		addr	+= bytes;
354 
355 		offset	= 0;
356 		buf++;
357 		i++;
358 
359 		if (i == RXE_BUF_PER_MAP) {
360 			i = 0;
361 			map++;
362 			buf = map[0]->buf;
363 		}
364 	}
365 
366 	return 0;
367 
368 err1:
369 	return err;
370 }
371 
372 /* copy data in or out of a wqe, i.e. sg list
373  * under the control of a dma descriptor
374  */
375 int copy_data(
376 	struct rxe_pd		*pd,
377 	int			access,
378 	struct rxe_dma_info	*dma,
379 	void			*addr,
380 	int			length,
381 	enum rxe_mr_copy_dir	dir)
382 {
383 	int			bytes;
384 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
385 	int			offset	= dma->sge_offset;
386 	int			resid	= dma->resid;
387 	struct rxe_mr		*mr	= NULL;
388 	u64			iova;
389 	int			err;
390 
391 	if (length == 0)
392 		return 0;
393 
394 	if (length > resid) {
395 		err = -EINVAL;
396 		goto err2;
397 	}
398 
399 	if (sge->length && (offset < sge->length)) {
400 		mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
401 		if (!mr) {
402 			err = -EINVAL;
403 			goto err1;
404 		}
405 	}
406 
407 	while (length > 0) {
408 		bytes = length;
409 
410 		if (offset >= sge->length) {
411 			if (mr) {
412 				rxe_put(mr);
413 				mr = NULL;
414 			}
415 			sge++;
416 			dma->cur_sge++;
417 			offset = 0;
418 
419 			if (dma->cur_sge >= dma->num_sge) {
420 				err = -ENOSPC;
421 				goto err2;
422 			}
423 
424 			if (sge->length) {
425 				mr = lookup_mr(pd, access, sge->lkey,
426 					       RXE_LOOKUP_LOCAL);
427 				if (!mr) {
428 					err = -EINVAL;
429 					goto err1;
430 				}
431 			} else {
432 				continue;
433 			}
434 		}
435 
436 		if (bytes > sge->length - offset)
437 			bytes = sge->length - offset;
438 
439 		if (bytes > 0) {
440 			iova = sge->addr + offset;
441 
442 			err = rxe_mr_copy(mr, iova, addr, bytes, dir);
443 			if (err)
444 				goto err2;
445 
446 			offset	+= bytes;
447 			resid	-= bytes;
448 			length	-= bytes;
449 			addr	+= bytes;
450 		}
451 	}
452 
453 	dma->sge_offset = offset;
454 	dma->resid	= resid;
455 
456 	if (mr)
457 		rxe_put(mr);
458 
459 	return 0;
460 
461 err2:
462 	if (mr)
463 		rxe_put(mr);
464 err1:
465 	return err;
466 }
467 
468 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
469 {
470 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
471 	int			offset	= dma->sge_offset;
472 	int			resid	= dma->resid;
473 
474 	while (length) {
475 		unsigned int bytes;
476 
477 		if (offset >= sge->length) {
478 			sge++;
479 			dma->cur_sge++;
480 			offset = 0;
481 			if (dma->cur_sge >= dma->num_sge)
482 				return -ENOSPC;
483 		}
484 
485 		bytes = length;
486 
487 		if (bytes > sge->length - offset)
488 			bytes = sge->length - offset;
489 
490 		offset	+= bytes;
491 		resid	-= bytes;
492 		length	-= bytes;
493 	}
494 
495 	dma->sge_offset = offset;
496 	dma->resid	= resid;
497 
498 	return 0;
499 }
500 
501 /* (1) find the mr corresponding to lkey/rkey
502  *     depending on lookup_type
503  * (2) verify that the (qp) pd matches the mr pd
504  * (3) verify that the mr can support the requested access
505  * (4) verify that mr state is valid
506  */
507 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
508 			 enum rxe_mr_lookup_type type)
509 {
510 	struct rxe_mr *mr;
511 	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
512 	int index = key >> 8;
513 
514 	mr = rxe_pool_get_index(&rxe->mr_pool, index);
515 	if (!mr)
516 		return NULL;
517 
518 	if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
519 		     (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
520 		     mr_pd(mr) != pd || (access && !(access & mr->access)) ||
521 		     mr->state != RXE_MR_STATE_VALID)) {
522 		rxe_put(mr);
523 		mr = NULL;
524 	}
525 
526 	return mr;
527 }
528 
529 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
530 {
531 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
532 	struct rxe_mr *mr;
533 	int ret;
534 
535 	mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
536 	if (!mr) {
537 		pr_err("%s: No MR for key %#x\n", __func__, key);
538 		ret = -EINVAL;
539 		goto err;
540 	}
541 
542 	if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
543 		pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
544 			__func__, key, (mr->rkey ? mr->rkey : mr->lkey));
545 		ret = -EINVAL;
546 		goto err_drop_ref;
547 	}
548 
549 	if (atomic_read(&mr->num_mw) > 0) {
550 		pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
551 			__func__);
552 		ret = -EINVAL;
553 		goto err_drop_ref;
554 	}
555 
556 	if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
557 		pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
558 		ret = -EINVAL;
559 		goto err_drop_ref;
560 	}
561 
562 	mr->state = RXE_MR_STATE_FREE;
563 	ret = 0;
564 
565 err_drop_ref:
566 	rxe_put(mr);
567 err:
568 	return ret;
569 }
570 
571 /* user can (re)register fast MR by executing a REG_MR WQE.
572  * user is expected to hold a reference on the ib mr until the
573  * WQE completes.
574  * Once a fast MR is created this is the only way to change the
575  * private keys. It is the responsibility of the user to maintain
576  * the ib mr keys in sync with rxe mr keys.
577  */
578 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
579 {
580 	struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
581 	u32 key = wqe->wr.wr.reg.key;
582 	u32 access = wqe->wr.wr.reg.access;
583 
584 	/* user can only register MR in free state */
585 	if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
586 		pr_warn("%s: mr->lkey = 0x%x not free\n",
587 			__func__, mr->lkey);
588 		return -EINVAL;
589 	}
590 
591 	/* user can only register mr with qp in same protection domain */
592 	if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
593 		pr_warn("%s: qp->pd and mr->pd don't match\n",
594 			__func__);
595 		return -EINVAL;
596 	}
597 
598 	/* user is only allowed to change key portion of l/rkey */
599 	if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
600 		pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
601 			__func__, key, mr->lkey);
602 		return -EINVAL;
603 	}
604 
605 	mr->access = access;
606 	mr->lkey = key;
607 	mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
608 	mr->iova = wqe->wr.wr.reg.mr->iova;
609 	mr->state = RXE_MR_STATE_VALID;
610 
611 	return 0;
612 }
613 
614 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
615 {
616 	struct rxe_mr *mr = to_rmr(ibmr);
617 
618 	/* See IBA 10.6.7.2.6 */
619 	if (atomic_read(&mr->num_mw) > 0)
620 		return -EINVAL;
621 
622 	rxe_cleanup(mr);
623 
624 	return 0;
625 }
626 
627 void rxe_mr_cleanup(struct rxe_pool_elem *elem)
628 {
629 	struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
630 	int i;
631 
632 	rxe_put(mr_pd(mr));
633 	ib_umem_release(mr->umem);
634 
635 	if (mr->map) {
636 		for (i = 0; i < mr->num_map; i++)
637 			kfree(mr->map[i]);
638 
639 		kfree(mr->map);
640 	}
641 }
642