xref: /openbmc/linux/drivers/infiniband/sw/rxe/rxe_mr.c (revision 0266a177)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include "rxe.h"
8 #include "rxe_loc.h"
9 
10 /* Return a random 8 bit key value that is
11  * different than the last_key. Set last_key to -1
12  * if this is the first key for an MR or MW
13  */
14 u8 rxe_get_next_key(u32 last_key)
15 {
16 	u8 key;
17 
18 	do {
19 		get_random_bytes(&key, 1);
20 	} while (key == last_key);
21 
22 	return key;
23 }
24 
25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26 {
27 
28 
29 	switch (mr->ibmr.type) {
30 	case IB_MR_TYPE_DMA:
31 		return 0;
32 
33 	case IB_MR_TYPE_USER:
34 	case IB_MR_TYPE_MEM_REG:
35 		if (iova < mr->ibmr.iova || length > mr->ibmr.length ||
36 		    iova > mr->ibmr.iova + mr->ibmr.length - length)
37 			return -EFAULT;
38 		return 0;
39 
40 	default:
41 		rxe_dbg_mr(mr, "type (%d) not supported\n", mr->ibmr.type);
42 		return -EFAULT;
43 	}
44 }
45 
46 #define IB_ACCESS_REMOTE	(IB_ACCESS_REMOTE_READ		\
47 				| IB_ACCESS_REMOTE_WRITE	\
48 				| IB_ACCESS_REMOTE_ATOMIC)
49 
50 static void rxe_mr_init(int access, struct rxe_mr *mr)
51 {
52 	u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1);
53 	u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
54 
55 	/* set ibmr->l/rkey and also copy into private l/rkey
56 	 * for user MRs these will always be the same
57 	 * for cases where caller 'owns' the key portion
58 	 * they may be different until REG_MR WQE is executed.
59 	 */
60 	mr->lkey = mr->ibmr.lkey = lkey;
61 	mr->rkey = mr->ibmr.rkey = rkey;
62 
63 	mr->state = RXE_MR_STATE_INVALID;
64 }
65 
66 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
67 {
68 	int i;
69 	int num_map;
70 	struct rxe_map **map = mr->map;
71 
72 	num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
73 
74 	mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
75 	if (!mr->map)
76 		goto err1;
77 
78 	for (i = 0; i < num_map; i++) {
79 		mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
80 		if (!mr->map[i])
81 			goto err2;
82 	}
83 
84 	BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
85 
86 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
87 	mr->map_mask = RXE_BUF_PER_MAP - 1;
88 
89 	mr->num_buf = num_buf;
90 	mr->num_map = num_map;
91 	mr->max_buf = num_map * RXE_BUF_PER_MAP;
92 
93 	return 0;
94 
95 err2:
96 	for (i--; i >= 0; i--)
97 		kfree(mr->map[i]);
98 
99 	kfree(mr->map);
100 err1:
101 	return -ENOMEM;
102 }
103 
104 void rxe_mr_init_dma(int access, struct rxe_mr *mr)
105 {
106 	rxe_mr_init(access, mr);
107 
108 	mr->access = access;
109 	mr->state = RXE_MR_STATE_VALID;
110 	mr->ibmr.type = IB_MR_TYPE_DMA;
111 }
112 
113 int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova,
114 		     int access, struct rxe_mr *mr)
115 {
116 	struct rxe_map		**map;
117 	struct rxe_phys_buf	*buf = NULL;
118 	struct ib_umem		*umem;
119 	struct sg_page_iter	sg_iter;
120 	int			num_buf;
121 	void			*vaddr;
122 	int err;
123 	int i;
124 
125 	umem = ib_umem_get(&rxe->ib_dev, start, length, access);
126 	if (IS_ERR(umem)) {
127 		rxe_dbg_mr(mr, "Unable to pin memory region err = %d\n",
128 			(int)PTR_ERR(umem));
129 		err = PTR_ERR(umem);
130 		goto err_out;
131 	}
132 
133 	num_buf = ib_umem_num_pages(umem);
134 
135 	rxe_mr_init(access, mr);
136 
137 	err = rxe_mr_alloc(mr, num_buf);
138 	if (err) {
139 		rxe_dbg_mr(mr, "Unable to allocate memory for map\n");
140 		goto err_release_umem;
141 	}
142 
143 	mr->page_shift = PAGE_SHIFT;
144 	mr->page_mask = PAGE_SIZE - 1;
145 
146 	num_buf			= 0;
147 	map = mr->map;
148 	if (length > 0) {
149 		buf = map[0]->buf;
150 
151 		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
152 			if (num_buf >= RXE_BUF_PER_MAP) {
153 				map++;
154 				buf = map[0]->buf;
155 				num_buf = 0;
156 			}
157 
158 			vaddr = page_address(sg_page_iter_page(&sg_iter));
159 			if (!vaddr) {
160 				rxe_dbg_mr(mr, "Unable to get virtual address\n");
161 				err = -ENOMEM;
162 				goto err_cleanup_map;
163 			}
164 
165 			buf->addr = (uintptr_t)vaddr;
166 			buf->size = PAGE_SIZE;
167 			num_buf++;
168 			buf++;
169 
170 		}
171 	}
172 
173 	mr->umem = umem;
174 	mr->access = access;
175 	mr->offset = ib_umem_offset(umem);
176 	mr->state = RXE_MR_STATE_VALID;
177 	mr->ibmr.type = IB_MR_TYPE_USER;
178 
179 	return 0;
180 
181 err_cleanup_map:
182 	for (i = 0; i < mr->num_map; i++)
183 		kfree(mr->map[i]);
184 	kfree(mr->map);
185 err_release_umem:
186 	ib_umem_release(umem);
187 err_out:
188 	return err;
189 }
190 
191 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr)
192 {
193 	int err;
194 
195 	/* always allow remote access for FMRs */
196 	rxe_mr_init(IB_ACCESS_REMOTE, mr);
197 
198 	err = rxe_mr_alloc(mr, max_pages);
199 	if (err)
200 		goto err1;
201 
202 	mr->max_buf = max_pages;
203 	mr->state = RXE_MR_STATE_FREE;
204 	mr->ibmr.type = IB_MR_TYPE_MEM_REG;
205 
206 	return 0;
207 
208 err1:
209 	return err;
210 }
211 
212 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
213 			size_t *offset_out)
214 {
215 	size_t offset = iova - mr->ibmr.iova + mr->offset;
216 	int			map_index;
217 	int			buf_index;
218 	u64			length;
219 
220 	if (likely(mr->page_shift)) {
221 		*offset_out = offset & mr->page_mask;
222 		offset >>= mr->page_shift;
223 		*n_out = offset & mr->map_mask;
224 		*m_out = offset >> mr->map_shift;
225 	} else {
226 		map_index = 0;
227 		buf_index = 0;
228 
229 		length = mr->map[map_index]->buf[buf_index].size;
230 
231 		while (offset >= length) {
232 			offset -= length;
233 			buf_index++;
234 
235 			if (buf_index == RXE_BUF_PER_MAP) {
236 				map_index++;
237 				buf_index = 0;
238 			}
239 			length = mr->map[map_index]->buf[buf_index].size;
240 		}
241 
242 		*m_out = map_index;
243 		*n_out = buf_index;
244 		*offset_out = offset;
245 	}
246 }
247 
248 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
249 {
250 	size_t offset;
251 	int m, n;
252 	void *addr;
253 
254 	if (mr->state != RXE_MR_STATE_VALID) {
255 		rxe_dbg_mr(mr, "Not in valid state\n");
256 		addr = NULL;
257 		goto out;
258 	}
259 
260 	if (!mr->map) {
261 		addr = (void *)(uintptr_t)iova;
262 		goto out;
263 	}
264 
265 	if (mr_check_range(mr, iova, length)) {
266 		rxe_dbg_mr(mr, "Range violation\n");
267 		addr = NULL;
268 		goto out;
269 	}
270 
271 	lookup_iova(mr, iova, &m, &n, &offset);
272 
273 	if (offset + length > mr->map[m]->buf[n].size) {
274 		rxe_dbg_mr(mr, "Crosses page boundary\n");
275 		addr = NULL;
276 		goto out;
277 	}
278 
279 	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
280 
281 out:
282 	return addr;
283 }
284 
285 /* copy data from a range (vaddr, vaddr+length-1) to or from
286  * a mr object starting at iova.
287  */
288 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
289 		enum rxe_mr_copy_dir dir)
290 {
291 	int			err;
292 	int			bytes;
293 	u8			*va;
294 	struct rxe_map		**map;
295 	struct rxe_phys_buf	*buf;
296 	int			m;
297 	int			i;
298 	size_t			offset;
299 
300 	if (length == 0)
301 		return 0;
302 
303 	if (mr->ibmr.type == IB_MR_TYPE_DMA) {
304 		u8 *src, *dest;
305 
306 		src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
307 
308 		dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
309 
310 		memcpy(dest, src, length);
311 
312 		return 0;
313 	}
314 
315 	WARN_ON_ONCE(!mr->map);
316 
317 	err = mr_check_range(mr, iova, length);
318 	if (err) {
319 		err = -EFAULT;
320 		goto err1;
321 	}
322 
323 	lookup_iova(mr, iova, &m, &i, &offset);
324 
325 	map = mr->map + m;
326 	buf	= map[0]->buf + i;
327 
328 	while (length > 0) {
329 		u8 *src, *dest;
330 
331 		va	= (u8 *)(uintptr_t)buf->addr + offset;
332 		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
333 		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
334 
335 		bytes	= buf->size - offset;
336 
337 		if (bytes > length)
338 			bytes = length;
339 
340 		memcpy(dest, src, bytes);
341 
342 		length	-= bytes;
343 		addr	+= bytes;
344 
345 		offset	= 0;
346 		buf++;
347 		i++;
348 
349 		if (i == RXE_BUF_PER_MAP) {
350 			i = 0;
351 			map++;
352 			buf = map[0]->buf;
353 		}
354 	}
355 
356 	return 0;
357 
358 err1:
359 	return err;
360 }
361 
362 /* copy data in or out of a wqe, i.e. sg list
363  * under the control of a dma descriptor
364  */
365 int copy_data(
366 	struct rxe_pd		*pd,
367 	int			access,
368 	struct rxe_dma_info	*dma,
369 	void			*addr,
370 	int			length,
371 	enum rxe_mr_copy_dir	dir)
372 {
373 	int			bytes;
374 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
375 	int			offset	= dma->sge_offset;
376 	int			resid	= dma->resid;
377 	struct rxe_mr		*mr	= NULL;
378 	u64			iova;
379 	int			err;
380 
381 	if (length == 0)
382 		return 0;
383 
384 	if (length > resid) {
385 		err = -EINVAL;
386 		goto err2;
387 	}
388 
389 	if (sge->length && (offset < sge->length)) {
390 		mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
391 		if (!mr) {
392 			err = -EINVAL;
393 			goto err1;
394 		}
395 	}
396 
397 	while (length > 0) {
398 		bytes = length;
399 
400 		if (offset >= sge->length) {
401 			if (mr) {
402 				rxe_put(mr);
403 				mr = NULL;
404 			}
405 			sge++;
406 			dma->cur_sge++;
407 			offset = 0;
408 
409 			if (dma->cur_sge >= dma->num_sge) {
410 				err = -ENOSPC;
411 				goto err2;
412 			}
413 
414 			if (sge->length) {
415 				mr = lookup_mr(pd, access, sge->lkey,
416 					       RXE_LOOKUP_LOCAL);
417 				if (!mr) {
418 					err = -EINVAL;
419 					goto err1;
420 				}
421 			} else {
422 				continue;
423 			}
424 		}
425 
426 		if (bytes > sge->length - offset)
427 			bytes = sge->length - offset;
428 
429 		if (bytes > 0) {
430 			iova = sge->addr + offset;
431 
432 			err = rxe_mr_copy(mr, iova, addr, bytes, dir);
433 			if (err)
434 				goto err2;
435 
436 			offset	+= bytes;
437 			resid	-= bytes;
438 			length	-= bytes;
439 			addr	+= bytes;
440 		}
441 	}
442 
443 	dma->sge_offset = offset;
444 	dma->resid	= resid;
445 
446 	if (mr)
447 		rxe_put(mr);
448 
449 	return 0;
450 
451 err2:
452 	if (mr)
453 		rxe_put(mr);
454 err1:
455 	return err;
456 }
457 
458 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
459 {
460 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
461 	int			offset	= dma->sge_offset;
462 	int			resid	= dma->resid;
463 
464 	while (length) {
465 		unsigned int bytes;
466 
467 		if (offset >= sge->length) {
468 			sge++;
469 			dma->cur_sge++;
470 			offset = 0;
471 			if (dma->cur_sge >= dma->num_sge)
472 				return -ENOSPC;
473 		}
474 
475 		bytes = length;
476 
477 		if (bytes > sge->length - offset)
478 			bytes = sge->length - offset;
479 
480 		offset	+= bytes;
481 		resid	-= bytes;
482 		length	-= bytes;
483 	}
484 
485 	dma->sge_offset = offset;
486 	dma->resid	= resid;
487 
488 	return 0;
489 }
490 
491 /* (1) find the mr corresponding to lkey/rkey
492  *     depending on lookup_type
493  * (2) verify that the (qp) pd matches the mr pd
494  * (3) verify that the mr can support the requested access
495  * (4) verify that mr state is valid
496  */
497 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
498 			 enum rxe_mr_lookup_type type)
499 {
500 	struct rxe_mr *mr;
501 	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
502 	int index = key >> 8;
503 
504 	mr = rxe_pool_get_index(&rxe->mr_pool, index);
505 	if (!mr)
506 		return NULL;
507 
508 	if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) ||
509 		     (type == RXE_LOOKUP_REMOTE && mr->rkey != key) ||
510 		     mr_pd(mr) != pd || ((access & mr->access) != access) ||
511 		     mr->state != RXE_MR_STATE_VALID)) {
512 		rxe_put(mr);
513 		mr = NULL;
514 	}
515 
516 	return mr;
517 }
518 
519 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key)
520 {
521 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
522 	struct rxe_mr *mr;
523 	int ret;
524 
525 	mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8);
526 	if (!mr) {
527 		rxe_dbg_mr(mr, "No MR for key %#x\n", key);
528 		ret = -EINVAL;
529 		goto err;
530 	}
531 
532 	if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) {
533 		rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n",
534 			key, (mr->rkey ? mr->rkey : mr->lkey));
535 		ret = -EINVAL;
536 		goto err_drop_ref;
537 	}
538 
539 	if (atomic_read(&mr->num_mw) > 0) {
540 		rxe_dbg_mr(mr, "Attempt to invalidate an MR while bound to MWs\n");
541 		ret = -EINVAL;
542 		goto err_drop_ref;
543 	}
544 
545 	if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) {
546 		rxe_dbg_mr(mr, "Type (%d) is wrong\n", mr->ibmr.type);
547 		ret = -EINVAL;
548 		goto err_drop_ref;
549 	}
550 
551 	mr->state = RXE_MR_STATE_FREE;
552 	ret = 0;
553 
554 err_drop_ref:
555 	rxe_put(mr);
556 err:
557 	return ret;
558 }
559 
560 /* user can (re)register fast MR by executing a REG_MR WQE.
561  * user is expected to hold a reference on the ib mr until the
562  * WQE completes.
563  * Once a fast MR is created this is the only way to change the
564  * private keys. It is the responsibility of the user to maintain
565  * the ib mr keys in sync with rxe mr keys.
566  */
567 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe)
568 {
569 	struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr);
570 	u32 key = wqe->wr.wr.reg.key;
571 	u32 access = wqe->wr.wr.reg.access;
572 
573 	/* user can only register MR in free state */
574 	if (unlikely(mr->state != RXE_MR_STATE_FREE)) {
575 		rxe_dbg_mr(mr, "mr->lkey = 0x%x not free\n", mr->lkey);
576 		return -EINVAL;
577 	}
578 
579 	/* user can only register mr with qp in same protection domain */
580 	if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) {
581 		rxe_dbg_mr(mr, "qp->pd and mr->pd don't match\n");
582 		return -EINVAL;
583 	}
584 
585 	/* user is only allowed to change key portion of l/rkey */
586 	if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) {
587 		rxe_dbg_mr(mr, "key = 0x%x has wrong index mr->lkey = 0x%x\n",
588 			key, mr->lkey);
589 		return -EINVAL;
590 	}
591 
592 	mr->access = access;
593 	mr->lkey = key;
594 	mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0;
595 	mr->ibmr.iova = wqe->wr.wr.reg.mr->iova;
596 	mr->state = RXE_MR_STATE_VALID;
597 
598 	return 0;
599 }
600 
601 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
602 {
603 	struct rxe_mr *mr = to_rmr(ibmr);
604 
605 	/* See IBA 10.6.7.2.6 */
606 	if (atomic_read(&mr->num_mw) > 0)
607 		return -EINVAL;
608 
609 	rxe_cleanup(mr);
610 
611 	return 0;
612 }
613 
614 void rxe_mr_cleanup(struct rxe_pool_elem *elem)
615 {
616 	struct rxe_mr *mr = container_of(elem, typeof(*mr), elem);
617 	int i;
618 
619 	rxe_put(mr_pd(mr));
620 	ib_umem_release(mr->umem);
621 
622 	if (mr->map) {
623 		for (i = 0; i < mr->num_map; i++)
624 			kfree(mr->map[i]);
625 
626 		kfree(mr->map);
627 	}
628 }
629