xref: /openbmc/linux/drivers/infiniband/sw/rxe/rxe_mr.c (revision aa0dc6a7)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include "rxe.h"
8 #include "rxe_loc.h"
9 
10 /* Return a random 8 bit key value that is
11  * different than the last_key. Set last_key to -1
12  * if this is the first key for an MR or MW
13  */
14 u8 rxe_get_next_key(u32 last_key)
15 {
16 	u8 key;
17 
18 	do {
19 		get_random_bytes(&key, 1);
20 	} while (key == last_key);
21 
22 	return key;
23 }
24 
25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26 {
27 	switch (mr->type) {
28 	case RXE_MR_TYPE_DMA:
29 		return 0;
30 
31 	case RXE_MR_TYPE_MR:
32 		if (iova < mr->iova || length > mr->length ||
33 		    iova > mr->iova + mr->length - length)
34 			return -EFAULT;
35 		return 0;
36 
37 	default:
38 		return -EFAULT;
39 	}
40 }
41 
42 #define IB_ACCESS_REMOTE	(IB_ACCESS_REMOTE_READ		\
43 				| IB_ACCESS_REMOTE_WRITE	\
44 				| IB_ACCESS_REMOTE_ATOMIC)
45 
46 static void rxe_mr_init(int access, struct rxe_mr *mr)
47 {
48 	u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
49 	u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
50 
51 	mr->ibmr.lkey = lkey;
52 	mr->ibmr.rkey = rkey;
53 	mr->state = RXE_MR_STATE_INVALID;
54 	mr->type = RXE_MR_TYPE_NONE;
55 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
56 }
57 
58 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
59 {
60 	int i;
61 	int num_map;
62 	struct rxe_map **map = mr->map;
63 
64 	num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
65 
66 	mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
67 	if (!mr->map)
68 		goto err1;
69 
70 	for (i = 0; i < num_map; i++) {
71 		mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
72 		if (!mr->map[i])
73 			goto err2;
74 	}
75 
76 	BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
77 
78 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
79 	mr->map_mask = RXE_BUF_PER_MAP - 1;
80 
81 	mr->num_buf = num_buf;
82 	mr->num_map = num_map;
83 	mr->max_buf = num_map * RXE_BUF_PER_MAP;
84 
85 	return 0;
86 
87 err2:
88 	for (i--; i >= 0; i--)
89 		kfree(mr->map[i]);
90 
91 	kfree(mr->map);
92 err1:
93 	return -ENOMEM;
94 }
95 
96 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
97 {
98 	rxe_mr_init(access, mr);
99 
100 	mr->ibmr.pd = &pd->ibpd;
101 	mr->access = access;
102 	mr->state = RXE_MR_STATE_VALID;
103 	mr->type = RXE_MR_TYPE_DMA;
104 }
105 
106 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
107 		     int access, struct rxe_mr *mr)
108 {
109 	struct rxe_map		**map;
110 	struct rxe_phys_buf	*buf = NULL;
111 	struct ib_umem		*umem;
112 	struct sg_page_iter	sg_iter;
113 	int			num_buf;
114 	void			*vaddr;
115 	int err;
116 
117 	umem = ib_umem_get(pd->ibpd.device, start, length, access);
118 	if (IS_ERR(umem)) {
119 		pr_warn("err %d from rxe_umem_get\n",
120 			(int)PTR_ERR(umem));
121 		err = PTR_ERR(umem);
122 		goto err1;
123 	}
124 
125 	mr->umem = umem;
126 	num_buf = ib_umem_num_pages(umem);
127 
128 	rxe_mr_init(access, mr);
129 
130 	err = rxe_mr_alloc(mr, num_buf);
131 	if (err) {
132 		pr_warn("err %d from rxe_mr_alloc\n", err);
133 		ib_umem_release(umem);
134 		goto err1;
135 	}
136 
137 	mr->page_shift = PAGE_SHIFT;
138 	mr->page_mask = PAGE_SIZE - 1;
139 
140 	num_buf			= 0;
141 	map = mr->map;
142 	if (length > 0) {
143 		buf = map[0]->buf;
144 
145 		for_each_sg_page(umem->sg_head.sgl, &sg_iter, umem->nmap, 0) {
146 			if (num_buf >= RXE_BUF_PER_MAP) {
147 				map++;
148 				buf = map[0]->buf;
149 				num_buf = 0;
150 			}
151 
152 			vaddr = page_address(sg_page_iter_page(&sg_iter));
153 			if (!vaddr) {
154 				pr_warn("null vaddr\n");
155 				ib_umem_release(umem);
156 				err = -ENOMEM;
157 				goto err1;
158 			}
159 
160 			buf->addr = (uintptr_t)vaddr;
161 			buf->size = PAGE_SIZE;
162 			num_buf++;
163 			buf++;
164 
165 		}
166 	}
167 
168 	mr->ibmr.pd = &pd->ibpd;
169 	mr->umem = umem;
170 	mr->access = access;
171 	mr->length = length;
172 	mr->iova = iova;
173 	mr->va = start;
174 	mr->offset = ib_umem_offset(umem);
175 	mr->state = RXE_MR_STATE_VALID;
176 	mr->type = RXE_MR_TYPE_MR;
177 
178 	return 0;
179 
180 err1:
181 	return err;
182 }
183 
184 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
185 {
186 	int err;
187 
188 	rxe_mr_init(0, mr);
189 
190 	/* In fastreg, we also set the rkey */
191 	mr->ibmr.rkey = mr->ibmr.lkey;
192 
193 	err = rxe_mr_alloc(mr, max_pages);
194 	if (err)
195 		goto err1;
196 
197 	mr->ibmr.pd = &pd->ibpd;
198 	mr->max_buf = max_pages;
199 	mr->state = RXE_MR_STATE_FREE;
200 	mr->type = RXE_MR_TYPE_MR;
201 
202 	return 0;
203 
204 err1:
205 	return err;
206 }
207 
208 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
209 			size_t *offset_out)
210 {
211 	size_t offset = iova - mr->iova + mr->offset;
212 	int			map_index;
213 	int			buf_index;
214 	u64			length;
215 
216 	if (likely(mr->page_shift)) {
217 		*offset_out = offset & mr->page_mask;
218 		offset >>= mr->page_shift;
219 		*n_out = offset & mr->map_mask;
220 		*m_out = offset >> mr->map_shift;
221 	} else {
222 		map_index = 0;
223 		buf_index = 0;
224 
225 		length = mr->map[map_index]->buf[buf_index].size;
226 
227 		while (offset >= length) {
228 			offset -= length;
229 			buf_index++;
230 
231 			if (buf_index == RXE_BUF_PER_MAP) {
232 				map_index++;
233 				buf_index = 0;
234 			}
235 			length = mr->map[map_index]->buf[buf_index].size;
236 		}
237 
238 		*m_out = map_index;
239 		*n_out = buf_index;
240 		*offset_out = offset;
241 	}
242 }
243 
244 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
245 {
246 	size_t offset;
247 	int m, n;
248 	void *addr;
249 
250 	if (mr->state != RXE_MR_STATE_VALID) {
251 		pr_warn("mr not in valid state\n");
252 		addr = NULL;
253 		goto out;
254 	}
255 
256 	if (!mr->map) {
257 		addr = (void *)(uintptr_t)iova;
258 		goto out;
259 	}
260 
261 	if (mr_check_range(mr, iova, length)) {
262 		pr_warn("range violation\n");
263 		addr = NULL;
264 		goto out;
265 	}
266 
267 	lookup_iova(mr, iova, &m, &n, &offset);
268 
269 	if (offset + length > mr->map[m]->buf[n].size) {
270 		pr_warn("crosses page boundary\n");
271 		addr = NULL;
272 		goto out;
273 	}
274 
275 	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
276 
277 out:
278 	return addr;
279 }
280 
281 /* copy data from a range (vaddr, vaddr+length-1) to or from
282  * a mr object starting at iova. Compute incremental value of
283  * crc32 if crcp is not zero. caller must hold a reference to mr
284  */
285 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
286 		enum rxe_mr_copy_dir dir, u32 *crcp)
287 {
288 	int			err;
289 	int			bytes;
290 	u8			*va;
291 	struct rxe_map		**map;
292 	struct rxe_phys_buf	*buf;
293 	int			m;
294 	int			i;
295 	size_t			offset;
296 	u32			crc = crcp ? (*crcp) : 0;
297 
298 	if (length == 0)
299 		return 0;
300 
301 	if (mr->type == RXE_MR_TYPE_DMA) {
302 		u8 *src, *dest;
303 
304 		src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
305 
306 		dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
307 
308 		memcpy(dest, src, length);
309 
310 		if (crcp)
311 			*crcp = rxe_crc32(to_rdev(mr->ibmr.device), *crcp, dest,
312 					  length);
313 
314 		return 0;
315 	}
316 
317 	WARN_ON_ONCE(!mr->map);
318 
319 	err = mr_check_range(mr, iova, length);
320 	if (err) {
321 		err = -EFAULT;
322 		goto err1;
323 	}
324 
325 	lookup_iova(mr, iova, &m, &i, &offset);
326 
327 	map = mr->map + m;
328 	buf	= map[0]->buf + i;
329 
330 	while (length > 0) {
331 		u8 *src, *dest;
332 
333 		va	= (u8 *)(uintptr_t)buf->addr + offset;
334 		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
335 		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
336 
337 		bytes	= buf->size - offset;
338 
339 		if (bytes > length)
340 			bytes = length;
341 
342 		memcpy(dest, src, bytes);
343 
344 		if (crcp)
345 			crc = rxe_crc32(to_rdev(mr->ibmr.device), crc, dest,
346 					bytes);
347 
348 		length	-= bytes;
349 		addr	+= bytes;
350 
351 		offset	= 0;
352 		buf++;
353 		i++;
354 
355 		if (i == RXE_BUF_PER_MAP) {
356 			i = 0;
357 			map++;
358 			buf = map[0]->buf;
359 		}
360 	}
361 
362 	if (crcp)
363 		*crcp = crc;
364 
365 	return 0;
366 
367 err1:
368 	return err;
369 }
370 
371 /* copy data in or out of a wqe, i.e. sg list
372  * under the control of a dma descriptor
373  */
374 int copy_data(
375 	struct rxe_pd		*pd,
376 	int			access,
377 	struct rxe_dma_info	*dma,
378 	void			*addr,
379 	int			length,
380 	enum rxe_mr_copy_dir	dir,
381 	u32			*crcp)
382 {
383 	int			bytes;
384 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
385 	int			offset	= dma->sge_offset;
386 	int			resid	= dma->resid;
387 	struct rxe_mr		*mr	= NULL;
388 	u64			iova;
389 	int			err;
390 
391 	if (length == 0)
392 		return 0;
393 
394 	if (length > resid) {
395 		err = -EINVAL;
396 		goto err2;
397 	}
398 
399 	if (sge->length && (offset < sge->length)) {
400 		mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
401 		if (!mr) {
402 			err = -EINVAL;
403 			goto err1;
404 		}
405 	}
406 
407 	while (length > 0) {
408 		bytes = length;
409 
410 		if (offset >= sge->length) {
411 			if (mr) {
412 				rxe_drop_ref(mr);
413 				mr = NULL;
414 			}
415 			sge++;
416 			dma->cur_sge++;
417 			offset = 0;
418 
419 			if (dma->cur_sge >= dma->num_sge) {
420 				err = -ENOSPC;
421 				goto err2;
422 			}
423 
424 			if (sge->length) {
425 				mr = lookup_mr(pd, access, sge->lkey,
426 					       RXE_LOOKUP_LOCAL);
427 				if (!mr) {
428 					err = -EINVAL;
429 					goto err1;
430 				}
431 			} else {
432 				continue;
433 			}
434 		}
435 
436 		if (bytes > sge->length - offset)
437 			bytes = sge->length - offset;
438 
439 		if (bytes > 0) {
440 			iova = sge->addr + offset;
441 
442 			err = rxe_mr_copy(mr, iova, addr, bytes, dir, crcp);
443 			if (err)
444 				goto err2;
445 
446 			offset	+= bytes;
447 			resid	-= bytes;
448 			length	-= bytes;
449 			addr	+= bytes;
450 		}
451 	}
452 
453 	dma->sge_offset = offset;
454 	dma->resid	= resid;
455 
456 	if (mr)
457 		rxe_drop_ref(mr);
458 
459 	return 0;
460 
461 err2:
462 	if (mr)
463 		rxe_drop_ref(mr);
464 err1:
465 	return err;
466 }
467 
468 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
469 {
470 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
471 	int			offset	= dma->sge_offset;
472 	int			resid	= dma->resid;
473 
474 	while (length) {
475 		unsigned int bytes;
476 
477 		if (offset >= sge->length) {
478 			sge++;
479 			dma->cur_sge++;
480 			offset = 0;
481 			if (dma->cur_sge >= dma->num_sge)
482 				return -ENOSPC;
483 		}
484 
485 		bytes = length;
486 
487 		if (bytes > sge->length - offset)
488 			bytes = sge->length - offset;
489 
490 		offset	+= bytes;
491 		resid	-= bytes;
492 		length	-= bytes;
493 	}
494 
495 	dma->sge_offset = offset;
496 	dma->resid	= resid;
497 
498 	return 0;
499 }
500 
501 /* (1) find the mr corresponding to lkey/rkey
502  *     depending on lookup_type
503  * (2) verify that the (qp) pd matches the mr pd
504  * (3) verify that the mr can support the requested access
505  * (4) verify that mr state is valid
506  */
507 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
508 			 enum rxe_mr_lookup_type type)
509 {
510 	struct rxe_mr *mr;
511 	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
512 	int index = key >> 8;
513 
514 	mr = rxe_pool_get_index(&rxe->mr_pool, index);
515 	if (!mr)
516 		return NULL;
517 
518 	if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) ||
519 		     (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) ||
520 		     mr_pd(mr) != pd || (access && !(access & mr->access)) ||
521 		     mr->state != RXE_MR_STATE_VALID)) {
522 		rxe_drop_ref(mr);
523 		mr = NULL;
524 	}
525 
526 	return mr;
527 }
528 
529 int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
530 {
531 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
532 	struct rxe_mr *mr;
533 	int ret;
534 
535 	mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
536 	if (!mr) {
537 		pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
538 		ret = -EINVAL;
539 		goto err;
540 	}
541 
542 	if (rkey != mr->ibmr.rkey) {
543 		pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
544 			__func__, rkey, mr->ibmr.rkey);
545 		ret = -EINVAL;
546 		goto err_drop_ref;
547 	}
548 
549 	if (atomic_read(&mr->num_mw) > 0) {
550 		pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
551 			__func__);
552 		ret = -EINVAL;
553 		goto err_drop_ref;
554 	}
555 
556 	mr->state = RXE_MR_STATE_FREE;
557 	ret = 0;
558 
559 err_drop_ref:
560 	rxe_drop_ref(mr);
561 err:
562 	return ret;
563 }
564 
565 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
566 {
567 	struct rxe_mr *mr = to_rmr(ibmr);
568 
569 	if (atomic_read(&mr->num_mw) > 0) {
570 		pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
571 			__func__);
572 		return -EINVAL;
573 	}
574 
575 	mr->state = RXE_MR_STATE_ZOMBIE;
576 	rxe_drop_ref(mr_pd(mr));
577 	rxe_drop_index(mr);
578 	rxe_drop_ref(mr);
579 
580 	return 0;
581 }
582 
583 void rxe_mr_cleanup(struct rxe_pool_entry *arg)
584 {
585 	struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
586 	int i;
587 
588 	ib_umem_release(mr->umem);
589 
590 	if (mr->map) {
591 		for (i = 0; i < mr->num_map; i++)
592 			kfree(mr->map[i]);
593 
594 		kfree(mr->map);
595 	}
596 }
597