xref: /openbmc/linux/drivers/infiniband/sw/rxe/rxe_mr.c (revision 7e043a80)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include "rxe.h"
8 #include "rxe_loc.h"
9 
10 /* Return a random 8 bit key value that is
11  * different than the last_key. Set last_key to -1
12  * if this is the first key for an MR or MW
13  */
14 u8 rxe_get_next_key(u32 last_key)
15 {
16 	u8 key;
17 
18 	do {
19 		get_random_bytes(&key, 1);
20 	} while (key == last_key);
21 
22 	return key;
23 }
24 
25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26 {
27 
28 
29 	switch (mr->type) {
30 	case IB_MR_TYPE_DMA:
31 		return 0;
32 
33 	case IB_MR_TYPE_USER:
34 	case IB_MR_TYPE_MEM_REG:
35 		if (iova < mr->ibmr.iova || length > mr->ibmr.length ||
36 		    iova > mr->ibmr.iova + mr->ibmr.length - length)
37 			return -EFAULT;
38 		return 0;
39 
40 	default:
41 		pr_warn("%s: mr type (%d) not supported\n",
42 			__func__, mr->type);
43 		return -EFAULT;
44 	}
45 }
46 
47 #define IB_ACCESS_REMOTE	(IB_ACCESS_REMOTE_READ		\
48 				| IB_ACCESS_REMOTE_WRITE	\
49 				| IB_ACCESS_REMOTE_ATOMIC)
50 
51 static void rxe_mr_init(int access, struct rxe_mr *mr)
52 {
53 	u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
54 	u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
55 
56 	/* set ibmr->l/rkey and also copy into private l/rkey
57 	 * for user MRs these will always be the same
58 	 * for cases where caller 'owns' the key portion
59 	 * they may be different until REG_MR WQE is executed.
60 	 */
61 	mr->lkey = mr->ibmr.lkey = lkey;
62 	mr->rkey = mr->ibmr.rkey = rkey;
63 
64 	mr->state = RXE_MR_STATE_INVALID;
65 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
66 }
67 
68 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
69 {
70 	int i;
71 	int num_map;
72 	struct rxe_map **map = mr->map;
73 
74 	num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
75 
76 	mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
77 	if (!mr->map)
78 		goto err1;
79 
80 	for (i = 0; i < num_map; i++) {
81 		mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
82 		if (!mr->map[i])
83 			goto err2;
84 	}
85 
86 	BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
87 
88 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
89 	mr->map_mask = RXE_BUF_PER_MAP - 1;
90 
91 	mr->num_buf = num_buf;
92 	mr->num_map = num_map;
93 	mr->max_buf = num_map * RXE_BUF_PER_MAP;
94 
95 	return 0;
96 
97 err2:
98 	for (i--; i >= 0; i--)
99 		kfree(mr->map[i]);
100 
101 	kfree(mr->map);
102 err1:
103 	return -ENOMEM;
104 }
105 
106 void rxe_mr_init_dma(int access, struct rxe_mr *mr)
107 {
108 	rxe_mr_init(access, mr);
109 
110 	mr->access = access;
111 	mr->state = RXE_MR_STATE_VALID;
112 	mr->type = IB_MR_TYPE_DMA;
113 }
114 
115 int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
116 		     int access, struct rxe_mr *mr)
117 {
118 	struct rxe_map		**map;
119 	struct rxe_phys_buf	*buf = NULL;
120 	struct ib_umem		*umem;
121 	struct sg_page_iter	sg_iter;
122 	int			num_buf;
123 	void			*vaddr;
124 	int err;
125 	int i;
126 
127 	umem = ib_umem_get(&rxe->ib_dev, start, length, access);
128 	if (IS_ERR(umem)) {
129 		pr_warn("%s: Unable to pin memory region err = %d\n",
130 			__func__, (int)PTR_ERR(umem));
131 		err = PTR_ERR(umem);
132 		goto err_out;
133 	}
134 
135 	num_buf = ib_umem_num_pages(umem);
136 
137 	rxe_mr_init(access, mr);
138 
139 	err = rxe_mr_alloc(mr, num_buf);
140 	if (err) {
141 		pr_warn("%s: Unable to allocate memory for map\n",
142 				__func__);
143 		goto err_release_umem;
144 	}
145 
146 	mr->page_shift = PAGE_SHIFT;
147 	mr->page_mask = PAGE_SIZE - 1;
148 
149 	num_buf			= 0;
150 	map = mr->map;
151 	if (length > 0) {
152 		buf = map[0]->buf;
153 
154 		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
155 			if (num_buf >= RXE_BUF_PER_MAP) {
156 				map++;
157 				buf = map[0]->buf;
158 				num_buf = 0;
159 			}
160 
161 			vaddr = page_address(sg_page_iter_page(&sg_iter));
162 			if (!vaddr) {
163 				pr_warn("%s: Unable to get virtual address\n",
164 						__func__);
165 				err = -ENOMEM;
166 				goto err_cleanup_map;
167 			}
168 
169 			buf->addr = (uintptr_t)vaddr;
170 			buf->size = PAGE_SIZE;
171 			num_buf++;
172 			buf++;
173 
174 		}
175 	}
176 
177 	mr->umem = umem;
178 	mr->access = access;
179 	mr->offset = ib_umem_offset(umem);
180 	mr->state = RXE_MR_STATE_VALID;
181 	mr->type = IB_MR_TYPE_USER;
182 
183 	return 0;
184 
185 err_cleanup_map:
186 	for (i = 0; i < mr->num_map; i++)
187 		kfree(mr->map[i]);
188 	kfree(mr->map);
189 err_release_umem:
190 	ib_umem_release(umem);
191 err_out:
192 	return err;
193 }
194 
195 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
196 {
197 	int err;
198 
199 	/* always allow remote access for FMRs */
200 	rxe_mr_init(IB_ACCESS_REMOTE, mr);
201 
202 	err = rxe_mr_alloc(mr, max_pages);
203 	if (err)
204 		goto err1;
205 
206 	mr->max_buf = max_pages;
207 	mr->state = RXE_MR_STATE_FREE;
208 	mr->type = IB_MR_TYPE_MEM_REG;
209 
210 	return 0;
211 
212 err1:
213 	return err;
214 }
215 
216 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
217 			size_t *offset_out)
218 {
219 	size_t offset = iova - mr->ibmr.iova + mr->offset;
220 	int			map_index;
221 	int			buf_index;
222 	u64			length;
223 
224 	if (likely(mr->page_shift)) {
225 		*offset_out = offset & mr->page_mask;
226 		offset >>= mr->page_shift;
227 		*n_out = offset & mr->map_mask;
228 		*m_out = offset >> mr->map_shift;
229 	} else {
230 		map_index = 0;
231 		buf_index = 0;
232 
233 		length = mr->map[map_index]->buf[buf_index].size;
234 
235 		while (offset >= length) {
236 			offset -= length;
237 			buf_index++;
238 
239 			if (buf_index == RXE_BUF_PER_MAP) {
240 				map_index++;
241 				buf_index = 0;
242 			}
243 			length = mr->map[map_index]->buf[buf_index].size;
244 		}
245 
246 		*m_out = map_index;
247 		*n_out = buf_index;
248 		*offset_out = offset;
249 	}
250 }
251 
252 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
253 {
254 	size_t offset;
255 	int m, n;
256 	void *addr;
257 
258 	if (mr->state != RXE_MR_STATE_VALID) {
259 		pr_warn("mr not in valid state\n");
260 		addr = NULL;
261 		goto out;
262 	}
263 
264 	if (!mr->map) {
265 		addr = (void *)(uintptr_t)iova;
266 		goto out;
267 	}
268 
269 	if (mr_check_range(mr, iova, length)) {
270 		pr_warn("range violation\n");
271 		addr = NULL;
272 		goto out;
273 	}
274 
275 	lookup_iova(mr, iova, &m, &n, &offset);
276 
277 	if (offset + length > mr->map[m]->buf[n].size) {
278 		pr_warn("crosses page boundary\n");
279 		addr = NULL;
280 		goto out;
281 	}
282 
283 	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
284 
285 out:
286 	return addr;
287 }
288 
289 /* copy data from a range (vaddr, vaddr+length-1) to or from
290  * a mr object starting at iova.
291  */
292 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
293 		enum rxe_mr_copy_dir dir)
294 {
295 	int			err;
296 	int			bytes;
297 	u8			*va;
298 	struct rxe_map		**map;
299 	struct rxe_phys_buf	*buf;
300 	int			m;
301 	int			i;
302 	size_t			offset;
303 
304 	if (length == 0)
305 		return 0;
306 
307 	if (mr->type == IB_MR_TYPE_DMA) {
308 		u8 *src, *dest;
309 
310 		src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
311 
312 		dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
313 
314 		memcpy(dest, src, length);
315 
316 		return 0;
317 	}
318 
319 	WARN_ON_ONCE(!mr->map);
320 
321 	err = mr_check_range(mr, iova, length);
322 	if (err) {
323 		err = -EFAULT;
324 		goto err1;
325 	}
326 
327 	lookup_iova(mr, iova, &m, &i, &offset);
328 
329 	map = mr->map + m;
330 	buf	= map[0]->buf + i;
331 
332 	while (length > 0) {
333 		u8 *src, *dest;
334 
335 		va	= (u8 *)(uintptr_t)buf->addr + offset;
336 		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
337 		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
338 
339 		bytes	= buf->size - offset;
340 
341 		if (bytes > length)
342 			bytes = length;
343 
344 		memcpy(dest, src, bytes);
345 
346 		length	-= bytes;
347 		addr	+= bytes;
348 
349 		offset	= 0;
350 		buf++;
351 		i++;
352 
353 		if (i == RXE_BUF_PER_MAP) {
354 			i = 0;
355 			map++;
356 			buf = map[0]->buf;
357 		}
358 	}
359 
360 	return 0;
361 
362 err1:
363 	return err;
364 }
365 
366 /* copy data in or out of a wqe, i.e. sg list
367  * under the control of a dma descriptor
368  */
369 int copy_data(
370 	struct rxe_pd		*pd,
371 	int			access,
372 	struct rxe_dma_info	*dma,
373 	void			*addr,
374 	int			length,
375 	enum rxe_mr_copy_dir	dir)
376 {
377 	int			bytes;
378 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
379 	int			offset	= dma->sge_offset;
380 	int			resid	= dma->resid;
381 	struct rxe_mr		*mr	= NULL;
382 	u64			iova;
383 	int			err;
384 
385 	if (length == 0)
386 		return 0;
387 
388 	if (length > resid) {
389 		err = -EINVAL;
390 		goto err2;
391 	}
392 
393 	if (sge->length && (offset < sge->length)) {
394 		mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
395 		if (!mr) {
396 			err = -EINVAL;
397 			goto err1;
398 		}
399 	}
400 
401 	while (length > 0) {
402 		bytes = length;
403 
404 		if (offset >= sge->length) {
405 			if (mr) {
406 				rxe_put(mr);
407 				mr = NULL;
408 			}
409 			sge++;
410 			dma->cur_sge++;
411 			offset = 0;
412 
413 			if (dma->cur_sge >= dma->num_sge) {
414 				err = -ENOSPC;
415 				goto err2;
416 			}
417 
418 			if (sge->length) {
419 				mr = lookup_mr(pd, access, sge->lkey,
420 					       RXE_LOOKUP_LOCAL);
421 				if (!mr) {
422 					err = -EINVAL;
423 					goto err1;
424 				}
425 			} else {
426 				continue;
427 			}
428 		}
429 
430 		if (bytes > sge->length - offset)
431 			bytes = sge->length - offset;
432 
433 		if (bytes > 0) {
434 			iova = sge->addr + offset;
435 
436 			err = rxe_mr_copy(mr, iova, addr, bytes, dir);
437 			if (err)
438 				goto err2;
439 
440 			offset	+= bytes;
441 			resid	-= bytes;
442 			length	-= bytes;
443 			addr	+= bytes;
444 		}
445 	}
446 
447 	dma->sge_offset = offset;
448 	dma->resid	= resid;
449 
450 	if (mr)
451 		rxe_put(mr);
452 
453 	return 0;
454 
455 err2:
456 	if (mr)
457 		rxe_put(mr);
458 err1:
459 	return err;
460 }
461 
462 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
463 {
464 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
465 	int			offset	= dma->sge_offset;
466 	int			resid	= dma->resid;
467 
468 	while (length) {
469 		unsigned int bytes;
470 
471 		if (offset >= sge->length) {
472 			sge++;
473 			dma->cur_sge++;
474 			offset = 0;
475 			if (dma->cur_sge >= dma->num_sge)
476 				return -ENOSPC;
477 		}
478 
479 		bytes = length;
480 
481 		if (bytes > sge->length - offset)
482 			bytes = sge->length - offset;
483 
484 		offset	+= bytes;
485 		resid	-= bytes;
486 		length	-= bytes;
487 	}
488 
489 	dma->sge_offset = offset;
490 	dma->resid	= resid;
491 
492 	return 0;
493 }
494 
495 /* (1) find the mr corresponding to lkey/rkey
496  *     depending on lookup_type
497  * (2) verify that the (qp) pd matches the mr pd
498  * (3) verify that the mr can support the requested access
499  * (4) verify that mr state is valid
500  */
501 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
502 			 enum rxe_mr_lookup_type type)
503 {
504 	struct rxe_mr *mr;
505 	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
506 	int index = key >> 8;
507 
508 	mr = rxe_pool_get_index(&rxe->mr_pool, index);
509 	if (!mr)
510 		return NULL;
511 
512 	if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
513 		     (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
514 		     mr_pd(mr) != pd || (access && !(access & mr->access)) ||
515 		     mr->state != RXE_MR_STATE_VALID)) {
516 		rxe_put(mr);
517 		mr = NULL;
518 	}
519 
520 	return mr;
521 }
522 
523 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
524 {
525 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
526 	struct rxe_mr *mr;
527 	int ret;
528 
529 	mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
530 	if (!mr) {
531 		pr_err("%s: No MR for key %#x\n", __func__, key);
532 		ret = -EINVAL;
533 		goto err;
534 	}
535 
536 	if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
537 		pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n",
538 			__func__, key, (mr->rkey ? mr->rkey : mr->lkey));
539 		ret = -EINVAL;
540 		goto err_drop_ref;
541 	}
542 
543 	if (atomic_read(&mr->num_mw) > 0) {
544 		pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
545 			__func__);
546 		ret = -EINVAL;
547 		goto err_drop_ref;
548 	}
549 
550 	if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) {
551 		pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type);
552 		ret = -EINVAL;
553 		goto err_drop_ref;
554 	}
555 
556 	mr->state = RXE_MR_STATE_FREE;
557 	ret = 0;
558 
559 err_drop_ref:
560 	rxe_put(mr);
561 err:
562 	return ret;
563 }
564 
565 /* user can (re)register fast MR by executing a REG_MR WQE.
566  * user is expected to hold a reference on the ib mr until the
567  * WQE completes.
568  * Once a fast MR is created this is the only way to change the
569  * private keys. It is the responsibility of the user to maintain
570  * the ib mr keys in sync with rxe mr keys.
571  */
572 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
573 {
574 	struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
575 	u32 key = wqe->wr.wr.reg.key;
576 	u32 access = wqe->wr.wr.reg.access;
577 
578 	/* user can only register MR in free state */
579 	if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
580 		pr_warn("%s: mr->lkey = 0x%x not free\n",
581 			__func__, mr->lkey);
582 		return -EINVAL;
583 	}
584 
585 	/* user can only register mr with qp in same protection domain */
586 	if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
587 		pr_warn("%s: qp->pd and mr->pd don't match\n",
588 			__func__);
589 		return -EINVAL;
590 	}
591 
592 	/* user is only allowed to change key portion of l/rkey */
593 	if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
594 		pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n",
595 			__func__, key, mr->lkey);
596 		return -EINVAL;
597 	}
598 
599 	mr->access = access;
600 	mr->lkey = key;
601 	mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
602 	mr->ibmr.iova = wqe->wr.wr.reg.mr->iova;
603 	mr->state = RXE_MR_STATE_VALID;
604 
605 	return 0;
606 }
607 
608 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
609 {
610 	struct rxe_mr *mr = to_rmr(ibmr);
611 
612 	/* See IBA 10.6.7.2.6 */
613 	if (atomic_read(&mr->num_mw) > 0)
614 		return -EINVAL;
615 
616 	rxe_cleanup(mr);
617 
618 	return 0;
619 }
620 
621 void rxe_mr_cleanup(struct rxe_pool_elem *elem)
622 {
623 	struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
624 	int i;
625 
626 	rxe_put(mr_pd(mr));
627 	ib_umem_release(mr->umem);
628 
629 	if (mr->map) {
630 		for (i = 0; i < mr->num_map; i++)
631 			kfree(mr->map[i]);
632 
633 		kfree(mr->map);
634 	}
635 }
636