xref: /openbmc/linux/drivers/infiniband/sw/rxe/rxe_mr.c (revision 87875c10)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include "rxe.h"
8 #include "rxe_loc.h"
9 
10 /* Return a random 8 bit key value that is
11  * different than the last_key. Set last_key to -1
12  * if this is the first key for an MR or MW
13  */
14 u8 rxe_get_next_key(u32 last_key)
15 {
16 	u8 key;
17 
18 	do {
19 		get_random_bytes(&key, 1);
20 	} while (key == last_key);
21 
22 	return key;
23 }
24 
25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26 {
27 	switch (mr->type) {
28 	case RXE_MR_TYPE_DMA:
29 		return 0;
30 
31 	case RXE_MR_TYPE_MR:
32 		if (iova < mr->iova || length > mr->length ||
33 		    iova > mr->iova + mr->length - length)
34 			return -EFAULT;
35 		return 0;
36 
37 	default:
38 		return -EFAULT;
39 	}
40 }
41 
42 #define IB_ACCESS_REMOTE	(IB_ACCESS_REMOTE_READ		\
43 				| IB_ACCESS_REMOTE_WRITE	\
44 				| IB_ACCESS_REMOTE_ATOMIC)
45 
46 static void rxe_mr_init(int access, struct rxe_mr *mr)
47 {
48 	u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
49 	u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
50 
51 	mr->ibmr.lkey = lkey;
52 	mr->ibmr.rkey = rkey;
53 	mr->state = RXE_MR_STATE_INVALID;
54 	mr->type = RXE_MR_TYPE_NONE;
55 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
56 }
57 
58 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
59 {
60 	int i;
61 	int num_map;
62 	struct rxe_map **map = mr->map;
63 
64 	num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
65 
66 	mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
67 	if (!mr->map)
68 		goto err1;
69 
70 	for (i = 0; i < num_map; i++) {
71 		mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
72 		if (!mr->map[i])
73 			goto err2;
74 	}
75 
76 	BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
77 
78 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
79 	mr->map_mask = RXE_BUF_PER_MAP - 1;
80 
81 	mr->num_buf = num_buf;
82 	mr->num_map = num_map;
83 	mr->max_buf = num_map * RXE_BUF_PER_MAP;
84 
85 	return 0;
86 
87 err2:
88 	for (i--; i >= 0; i--)
89 		kfree(mr->map[i]);
90 
91 	kfree(mr->map);
92 err1:
93 	return -ENOMEM;
94 }
95 
96 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
97 {
98 	rxe_mr_init(access, mr);
99 
100 	mr->ibmr.pd = &pd->ibpd;
101 	mr->access = access;
102 	mr->state = RXE_MR_STATE_VALID;
103 	mr->type = RXE_MR_TYPE_DMA;
104 }
105 
106 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
107 		     int access, struct rxe_mr *mr)
108 {
109 	struct rxe_map		**map;
110 	struct rxe_phys_buf	*buf = NULL;
111 	struct ib_umem		*umem;
112 	struct sg_page_iter	sg_iter;
113 	int			num_buf;
114 	void			*vaddr;
115 	int err;
116 	int i;
117 
118 	umem = ib_umem_get(pd->ibpd.device, start, length, access);
119 	if (IS_ERR(umem)) {
120 		pr_warn("%s: Unable to pin memory region err = %d\n",
121 			__func__, (int)PTR_ERR(umem));
122 		err = PTR_ERR(umem);
123 		goto err_out;
124 	}
125 
126 	mr->umem = umem;
127 	num_buf = ib_umem_num_pages(umem);
128 
129 	rxe_mr_init(access, mr);
130 
131 	err = rxe_mr_alloc(mr, num_buf);
132 	if (err) {
133 		pr_warn("%s: Unable to allocate memory for map\n",
134 				__func__);
135 		goto err_release_umem;
136 	}
137 
138 	mr->page_shift = PAGE_SHIFT;
139 	mr->page_mask = PAGE_SIZE - 1;
140 
141 	num_buf			= 0;
142 	map = mr->map;
143 	if (length > 0) {
144 		buf = map[0]->buf;
145 
146 		for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
147 			if (num_buf >= RXE_BUF_PER_MAP) {
148 				map++;
149 				buf = map[0]->buf;
150 				num_buf = 0;
151 			}
152 
153 			vaddr = page_address(sg_page_iter_page(&sg_iter));
154 			if (!vaddr) {
155 				pr_warn("%s: Unable to get virtual address\n",
156 						__func__);
157 				err = -ENOMEM;
158 				goto err_cleanup_map;
159 			}
160 
161 			buf->addr = (uintptr_t)vaddr;
162 			buf->size = PAGE_SIZE;
163 			num_buf++;
164 			buf++;
165 
166 		}
167 	}
168 
169 	mr->ibmr.pd = &pd->ibpd;
170 	mr->umem = umem;
171 	mr->access = access;
172 	mr->length = length;
173 	mr->iova = iova;
174 	mr->va = start;
175 	mr->offset = ib_umem_offset(umem);
176 	mr->state = RXE_MR_STATE_VALID;
177 	mr->type = RXE_MR_TYPE_MR;
178 
179 	return 0;
180 
181 err_cleanup_map:
182 	for (i = 0; i < mr->num_map; i++)
183 		kfree(mr->map[i]);
184 	kfree(mr->map);
185 err_release_umem:
186 	ib_umem_release(umem);
187 err_out:
188 	return err;
189 }
190 
191 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
192 {
193 	int err;
194 
195 	rxe_mr_init(0, mr);
196 
197 	/* In fastreg, we also set the rkey */
198 	mr->ibmr.rkey = mr->ibmr.lkey;
199 
200 	err = rxe_mr_alloc(mr, max_pages);
201 	if (err)
202 		goto err1;
203 
204 	mr->ibmr.pd = &pd->ibpd;
205 	mr->max_buf = max_pages;
206 	mr->state = RXE_MR_STATE_FREE;
207 	mr->type = RXE_MR_TYPE_MR;
208 
209 	return 0;
210 
211 err1:
212 	return err;
213 }
214 
215 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
216 			size_t *offset_out)
217 {
218 	size_t offset = iova - mr->iova + mr->offset;
219 	int			map_index;
220 	int			buf_index;
221 	u64			length;
222 
223 	if (likely(mr->page_shift)) {
224 		*offset_out = offset & mr->page_mask;
225 		offset >>= mr->page_shift;
226 		*n_out = offset & mr->map_mask;
227 		*m_out = offset >> mr->map_shift;
228 	} else {
229 		map_index = 0;
230 		buf_index = 0;
231 
232 		length = mr->map[map_index]->buf[buf_index].size;
233 
234 		while (offset >= length) {
235 			offset -= length;
236 			buf_index++;
237 
238 			if (buf_index == RXE_BUF_PER_MAP) {
239 				map_index++;
240 				buf_index = 0;
241 			}
242 			length = mr->map[map_index]->buf[buf_index].size;
243 		}
244 
245 		*m_out = map_index;
246 		*n_out = buf_index;
247 		*offset_out = offset;
248 	}
249 }
250 
251 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
252 {
253 	size_t offset;
254 	int m, n;
255 	void *addr;
256 
257 	if (mr->state != RXE_MR_STATE_VALID) {
258 		pr_warn("mr not in valid state\n");
259 		addr = NULL;
260 		goto out;
261 	}
262 
263 	if (!mr->map) {
264 		addr = (void *)(uintptr_t)iova;
265 		goto out;
266 	}
267 
268 	if (mr_check_range(mr, iova, length)) {
269 		pr_warn("range violation\n");
270 		addr = NULL;
271 		goto out;
272 	}
273 
274 	lookup_iova(mr, iova, &m, &n, &offset);
275 
276 	if (offset + length > mr->map[m]->buf[n].size) {
277 		pr_warn("crosses page boundary\n");
278 		addr = NULL;
279 		goto out;
280 	}
281 
282 	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
283 
284 out:
285 	return addr;
286 }
287 
288 /* copy data from a range (vaddr, vaddr+length-1) to or from
289  * a mr object starting at iova. Compute incremental value of
290  * crc32 if crcp is not zero. caller must hold a reference to mr
291  */
292 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
293 		enum rxe_mr_copy_dir dir, u32 *crcp)
294 {
295 	int			err;
296 	int			bytes;
297 	u8			*va;
298 	struct rxe_map		**map;
299 	struct rxe_phys_buf	*buf;
300 	int			m;
301 	int			i;
302 	size_t			offset;
303 	u32			crc = crcp ? (*crcp) : 0;
304 
305 	if (length == 0)
306 		return 0;
307 
308 	if (mr->type == RXE_MR_TYPE_DMA) {
309 		u8 *src, *dest;
310 
311 		src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
312 
313 		dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
314 
315 		memcpy(dest, src, length);
316 
317 		if (crcp)
318 			*crcp = rxe_crc32(to_rdev(mr->ibmr.device), *crcp, dest,
319 					  length);
320 
321 		return 0;
322 	}
323 
324 	WARN_ON_ONCE(!mr->map);
325 
326 	err = mr_check_range(mr, iova, length);
327 	if (err) {
328 		err = -EFAULT;
329 		goto err1;
330 	}
331 
332 	lookup_iova(mr, iova, &m, &i, &offset);
333 
334 	map = mr->map + m;
335 	buf	= map[0]->buf + i;
336 
337 	while (length > 0) {
338 		u8 *src, *dest;
339 
340 		va	= (u8 *)(uintptr_t)buf->addr + offset;
341 		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
342 		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
343 
344 		bytes	= buf->size - offset;
345 
346 		if (bytes > length)
347 			bytes = length;
348 
349 		memcpy(dest, src, bytes);
350 
351 		if (crcp)
352 			crc = rxe_crc32(to_rdev(mr->ibmr.device), crc, dest,
353 					bytes);
354 
355 		length	-= bytes;
356 		addr	+= bytes;
357 
358 		offset	= 0;
359 		buf++;
360 		i++;
361 
362 		if (i == RXE_BUF_PER_MAP) {
363 			i = 0;
364 			map++;
365 			buf = map[0]->buf;
366 		}
367 	}
368 
369 	if (crcp)
370 		*crcp = crc;
371 
372 	return 0;
373 
374 err1:
375 	return err;
376 }
377 
378 /* copy data in or out of a wqe, i.e. sg list
379  * under the control of a dma descriptor
380  */
381 int copy_data(
382 	struct rxe_pd		*pd,
383 	int			access,
384 	struct rxe_dma_info	*dma,
385 	void			*addr,
386 	int			length,
387 	enum rxe_mr_copy_dir	dir,
388 	u32			*crcp)
389 {
390 	int			bytes;
391 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
392 	int			offset	= dma->sge_offset;
393 	int			resid	= dma->resid;
394 	struct rxe_mr		*mr	= NULL;
395 	u64			iova;
396 	int			err;
397 
398 	if (length == 0)
399 		return 0;
400 
401 	if (length > resid) {
402 		err = -EINVAL;
403 		goto err2;
404 	}
405 
406 	if (sge->length && (offset < sge->length)) {
407 		mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
408 		if (!mr) {
409 			err = -EINVAL;
410 			goto err1;
411 		}
412 	}
413 
414 	while (length > 0) {
415 		bytes = length;
416 
417 		if (offset >= sge->length) {
418 			if (mr) {
419 				rxe_drop_ref(mr);
420 				mr = NULL;
421 			}
422 			sge++;
423 			dma->cur_sge++;
424 			offset = 0;
425 
426 			if (dma->cur_sge >= dma->num_sge) {
427 				err = -ENOSPC;
428 				goto err2;
429 			}
430 
431 			if (sge->length) {
432 				mr = lookup_mr(pd, access, sge->lkey,
433 					       RXE_LOOKUP_LOCAL);
434 				if (!mr) {
435 					err = -EINVAL;
436 					goto err1;
437 				}
438 			} else {
439 				continue;
440 			}
441 		}
442 
443 		if (bytes > sge->length - offset)
444 			bytes = sge->length - offset;
445 
446 		if (bytes > 0) {
447 			iova = sge->addr + offset;
448 
449 			err = rxe_mr_copy(mr, iova, addr, bytes, dir, crcp);
450 			if (err)
451 				goto err2;
452 
453 			offset	+= bytes;
454 			resid	-= bytes;
455 			length	-= bytes;
456 			addr	+= bytes;
457 		}
458 	}
459 
460 	dma->sge_offset = offset;
461 	dma->resid	= resid;
462 
463 	if (mr)
464 		rxe_drop_ref(mr);
465 
466 	return 0;
467 
468 err2:
469 	if (mr)
470 		rxe_drop_ref(mr);
471 err1:
472 	return err;
473 }
474 
475 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
476 {
477 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
478 	int			offset	= dma->sge_offset;
479 	int			resid	= dma->resid;
480 
481 	while (length) {
482 		unsigned int bytes;
483 
484 		if (offset >= sge->length) {
485 			sge++;
486 			dma->cur_sge++;
487 			offset = 0;
488 			if (dma->cur_sge >= dma->num_sge)
489 				return -ENOSPC;
490 		}
491 
492 		bytes = length;
493 
494 		if (bytes > sge->length - offset)
495 			bytes = sge->length - offset;
496 
497 		offset	+= bytes;
498 		resid	-= bytes;
499 		length	-= bytes;
500 	}
501 
502 	dma->sge_offset = offset;
503 	dma->resid	= resid;
504 
505 	return 0;
506 }
507 
508 /* (1) find the mr corresponding to lkey/rkey
509  *     depending on lookup_type
510  * (2) verify that the (qp) pd matches the mr pd
511  * (3) verify that the mr can support the requested access
512  * (4) verify that mr state is valid
513  */
514 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
515 			 enum rxe_mr_lookup_type type)
516 {
517 	struct rxe_mr *mr;
518 	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
519 	int index = key >> 8;
520 
521 	mr = rxe_pool_get_index(&rxe->mr_pool, index);
522 	if (!mr)
523 		return NULL;
524 
525 	if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) ||
526 		     (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) ||
527 		     mr_pd(mr) != pd || (access && !(access & mr->access)) ||
528 		     mr->state != RXE_MR_STATE_VALID)) {
529 		rxe_drop_ref(mr);
530 		mr = NULL;
531 	}
532 
533 	return mr;
534 }
535 
536 int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
537 {
538 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
539 	struct rxe_mr *mr;
540 	int ret;
541 
542 	mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
543 	if (!mr) {
544 		pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
545 		ret = -EINVAL;
546 		goto err;
547 	}
548 
549 	if (rkey != mr->ibmr.rkey) {
550 		pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
551 			__func__, rkey, mr->ibmr.rkey);
552 		ret = -EINVAL;
553 		goto err_drop_ref;
554 	}
555 
556 	if (atomic_read(&mr->num_mw) > 0) {
557 		pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
558 			__func__);
559 		ret = -EINVAL;
560 		goto err_drop_ref;
561 	}
562 
563 	mr->state = RXE_MR_STATE_FREE;
564 	ret = 0;
565 
566 err_drop_ref:
567 	rxe_drop_ref(mr);
568 err:
569 	return ret;
570 }
571 
572 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
573 {
574 	struct rxe_mr *mr = to_rmr(ibmr);
575 
576 	if (atomic_read(&mr->num_mw) > 0) {
577 		pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
578 			__func__);
579 		return -EINVAL;
580 	}
581 
582 	mr->state = RXE_MR_STATE_ZOMBIE;
583 	rxe_drop_ref(mr_pd(mr));
584 	rxe_drop_index(mr);
585 	rxe_drop_ref(mr);
586 
587 	return 0;
588 }
589 
590 void rxe_mr_cleanup(struct rxe_pool_entry *arg)
591 {
592 	struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
593 	int i;
594 
595 	ib_umem_release(mr->umem);
596 
597 	if (mr->map) {
598 		for (i = 0; i < mr->num_map; i++)
599 			kfree(mr->map[i]);
600 
601 		kfree(mr->map);
602 	}
603 }
604