xref: /openbmc/linux/drivers/infiniband/sw/rxe/rxe_mr.c (revision ed84ef1c)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /*
3  * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved.
4  * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved.
5  */
6 
7 #include "rxe.h"
8 #include "rxe_loc.h"
9 
10 /* Return a random 8 bit key value that is
11  * different than the last_key. Set last_key to -1
12  * if this is the first key for an MR or MW
13  */
14 u8 rxe_get_next_key(u32 last_key)
15 {
16 	u8 key;
17 
18 	do {
19 		get_random_bytes(&key, 1);
20 	} while (key == last_key);
21 
22 	return key;
23 }
24 
25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length)
26 {
27 	switch (mr->type) {
28 	case RXE_MR_TYPE_DMA:
29 		return 0;
30 
31 	case RXE_MR_TYPE_MR:
32 		if (iova < mr->iova || length > mr->length ||
33 		    iova > mr->iova + mr->length - length)
34 			return -EFAULT;
35 		return 0;
36 
37 	default:
38 		return -EFAULT;
39 	}
40 }
41 
42 #define IB_ACCESS_REMOTE	(IB_ACCESS_REMOTE_READ		\
43 				| IB_ACCESS_REMOTE_WRITE	\
44 				| IB_ACCESS_REMOTE_ATOMIC)
45 
46 static void rxe_mr_init(int access, struct rxe_mr *mr)
47 {
48 	u32 lkey = mr->pelem.index << 8 | rxe_get_next_key(-1);
49 	u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0;
50 
51 	mr->ibmr.lkey = lkey;
52 	mr->ibmr.rkey = rkey;
53 	mr->state = RXE_MR_STATE_INVALID;
54 	mr->type = RXE_MR_TYPE_NONE;
55 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
56 }
57 
58 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf)
59 {
60 	int i;
61 	int num_map;
62 	struct rxe_map **map = mr->map;
63 
64 	num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP;
65 
66 	mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL);
67 	if (!mr->map)
68 		goto err1;
69 
70 	for (i = 0; i < num_map; i++) {
71 		mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL);
72 		if (!mr->map[i])
73 			goto err2;
74 	}
75 
76 	BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP));
77 
78 	mr->map_shift = ilog2(RXE_BUF_PER_MAP);
79 	mr->map_mask = RXE_BUF_PER_MAP - 1;
80 
81 	mr->num_buf = num_buf;
82 	mr->num_map = num_map;
83 	mr->max_buf = num_map * RXE_BUF_PER_MAP;
84 
85 	return 0;
86 
87 err2:
88 	for (i--; i >= 0; i--)
89 		kfree(mr->map[i]);
90 
91 	kfree(mr->map);
92 err1:
93 	return -ENOMEM;
94 }
95 
96 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr)
97 {
98 	rxe_mr_init(access, mr);
99 
100 	mr->ibmr.pd = &pd->ibpd;
101 	mr->access = access;
102 	mr->state = RXE_MR_STATE_VALID;
103 	mr->type = RXE_MR_TYPE_DMA;
104 }
105 
106 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova,
107 		     int access, struct rxe_mr *mr)
108 {
109 	struct rxe_map		**map;
110 	struct rxe_phys_buf	*buf = NULL;
111 	struct ib_umem		*umem;
112 	struct sg_page_iter	sg_iter;
113 	int			num_buf;
114 	void			*vaddr;
115 	int err;
116 	int i;
117 
118 	umem = ib_umem_get(pd->ibpd.device, start, length, access);
119 	if (IS_ERR(umem)) {
120 		pr_warn("%s: Unable to pin memory region err = %d\n",
121 			__func__, (int)PTR_ERR(umem));
122 		err = PTR_ERR(umem);
123 		goto err_out;
124 	}
125 
126 	num_buf = ib_umem_num_pages(umem);
127 
128 	rxe_mr_init(access, mr);
129 
130 	err = rxe_mr_alloc(mr, num_buf);
131 	if (err) {
132 		pr_warn("%s: Unable to allocate memory for map\n",
133 				__func__);
134 		goto err_release_umem;
135 	}
136 
137 	mr->page_shift = PAGE_SHIFT;
138 	mr->page_mask = PAGE_SIZE - 1;
139 
140 	num_buf			= 0;
141 	map = mr->map;
142 	if (length > 0) {
143 		buf = map[0]->buf;
144 
145 		for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) {
146 			if (num_buf >= RXE_BUF_PER_MAP) {
147 				map++;
148 				buf = map[0]->buf;
149 				num_buf = 0;
150 			}
151 
152 			vaddr = page_address(sg_page_iter_page(&sg_iter));
153 			if (!vaddr) {
154 				pr_warn("%s: Unable to get virtual address\n",
155 						__func__);
156 				err = -ENOMEM;
157 				goto err_cleanup_map;
158 			}
159 
160 			buf->addr = (uintptr_t)vaddr;
161 			buf->size = PAGE_SIZE;
162 			num_buf++;
163 			buf++;
164 
165 		}
166 	}
167 
168 	mr->ibmr.pd = &pd->ibpd;
169 	mr->umem = umem;
170 	mr->access = access;
171 	mr->length = length;
172 	mr->iova = iova;
173 	mr->va = start;
174 	mr->offset = ib_umem_offset(umem);
175 	mr->state = RXE_MR_STATE_VALID;
176 	mr->type = RXE_MR_TYPE_MR;
177 
178 	return 0;
179 
180 err_cleanup_map:
181 	for (i = 0; i < mr->num_map; i++)
182 		kfree(mr->map[i]);
183 	kfree(mr->map);
184 err_release_umem:
185 	ib_umem_release(umem);
186 err_out:
187 	return err;
188 }
189 
190 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr)
191 {
192 	int err;
193 
194 	rxe_mr_init(0, mr);
195 
196 	/* In fastreg, we also set the rkey */
197 	mr->ibmr.rkey = mr->ibmr.lkey;
198 
199 	err = rxe_mr_alloc(mr, max_pages);
200 	if (err)
201 		goto err1;
202 
203 	mr->ibmr.pd = &pd->ibpd;
204 	mr->max_buf = max_pages;
205 	mr->state = RXE_MR_STATE_FREE;
206 	mr->type = RXE_MR_TYPE_MR;
207 
208 	return 0;
209 
210 err1:
211 	return err;
212 }
213 
214 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out,
215 			size_t *offset_out)
216 {
217 	size_t offset = iova - mr->iova + mr->offset;
218 	int			map_index;
219 	int			buf_index;
220 	u64			length;
221 
222 	if (likely(mr->page_shift)) {
223 		*offset_out = offset & mr->page_mask;
224 		offset >>= mr->page_shift;
225 		*n_out = offset & mr->map_mask;
226 		*m_out = offset >> mr->map_shift;
227 	} else {
228 		map_index = 0;
229 		buf_index = 0;
230 
231 		length = mr->map[map_index]->buf[buf_index].size;
232 
233 		while (offset >= length) {
234 			offset -= length;
235 			buf_index++;
236 
237 			if (buf_index == RXE_BUF_PER_MAP) {
238 				map_index++;
239 				buf_index = 0;
240 			}
241 			length = mr->map[map_index]->buf[buf_index].size;
242 		}
243 
244 		*m_out = map_index;
245 		*n_out = buf_index;
246 		*offset_out = offset;
247 	}
248 }
249 
250 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length)
251 {
252 	size_t offset;
253 	int m, n;
254 	void *addr;
255 
256 	if (mr->state != RXE_MR_STATE_VALID) {
257 		pr_warn("mr not in valid state\n");
258 		addr = NULL;
259 		goto out;
260 	}
261 
262 	if (!mr->map) {
263 		addr = (void *)(uintptr_t)iova;
264 		goto out;
265 	}
266 
267 	if (mr_check_range(mr, iova, length)) {
268 		pr_warn("range violation\n");
269 		addr = NULL;
270 		goto out;
271 	}
272 
273 	lookup_iova(mr, iova, &m, &n, &offset);
274 
275 	if (offset + length > mr->map[m]->buf[n].size) {
276 		pr_warn("crosses page boundary\n");
277 		addr = NULL;
278 		goto out;
279 	}
280 
281 	addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset;
282 
283 out:
284 	return addr;
285 }
286 
287 /* copy data from a range (vaddr, vaddr+length-1) to or from
288  * a mr object starting at iova.
289  */
290 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length,
291 		enum rxe_mr_copy_dir dir)
292 {
293 	int			err;
294 	int			bytes;
295 	u8			*va;
296 	struct rxe_map		**map;
297 	struct rxe_phys_buf	*buf;
298 	int			m;
299 	int			i;
300 	size_t			offset;
301 
302 	if (length == 0)
303 		return 0;
304 
305 	if (mr->type == RXE_MR_TYPE_DMA) {
306 		u8 *src, *dest;
307 
308 		src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova);
309 
310 		dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr;
311 
312 		memcpy(dest, src, length);
313 
314 		return 0;
315 	}
316 
317 	WARN_ON_ONCE(!mr->map);
318 
319 	err = mr_check_range(mr, iova, length);
320 	if (err) {
321 		err = -EFAULT;
322 		goto err1;
323 	}
324 
325 	lookup_iova(mr, iova, &m, &i, &offset);
326 
327 	map = mr->map + m;
328 	buf	= map[0]->buf + i;
329 
330 	while (length > 0) {
331 		u8 *src, *dest;
332 
333 		va	= (u8 *)(uintptr_t)buf->addr + offset;
334 		src = (dir == RXE_TO_MR_OBJ) ? addr : va;
335 		dest = (dir == RXE_TO_MR_OBJ) ? va : addr;
336 
337 		bytes	= buf->size - offset;
338 
339 		if (bytes > length)
340 			bytes = length;
341 
342 		memcpy(dest, src, bytes);
343 
344 		length	-= bytes;
345 		addr	+= bytes;
346 
347 		offset	= 0;
348 		buf++;
349 		i++;
350 
351 		if (i == RXE_BUF_PER_MAP) {
352 			i = 0;
353 			map++;
354 			buf = map[0]->buf;
355 		}
356 	}
357 
358 	return 0;
359 
360 err1:
361 	return err;
362 }
363 
364 /* copy data in or out of a wqe, i.e. sg list
365  * under the control of a dma descriptor
366  */
367 int copy_data(
368 	struct rxe_pd		*pd,
369 	int			access,
370 	struct rxe_dma_info	*dma,
371 	void			*addr,
372 	int			length,
373 	enum rxe_mr_copy_dir	dir)
374 {
375 	int			bytes;
376 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
377 	int			offset	= dma->sge_offset;
378 	int			resid	= dma->resid;
379 	struct rxe_mr		*mr	= NULL;
380 	u64			iova;
381 	int			err;
382 
383 	if (length == 0)
384 		return 0;
385 
386 	if (length > resid) {
387 		err = -EINVAL;
388 		goto err2;
389 	}
390 
391 	if (sge->length && (offset < sge->length)) {
392 		mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL);
393 		if (!mr) {
394 			err = -EINVAL;
395 			goto err1;
396 		}
397 	}
398 
399 	while (length > 0) {
400 		bytes = length;
401 
402 		if (offset >= sge->length) {
403 			if (mr) {
404 				rxe_drop_ref(mr);
405 				mr = NULL;
406 			}
407 			sge++;
408 			dma->cur_sge++;
409 			offset = 0;
410 
411 			if (dma->cur_sge >= dma->num_sge) {
412 				err = -ENOSPC;
413 				goto err2;
414 			}
415 
416 			if (sge->length) {
417 				mr = lookup_mr(pd, access, sge->lkey,
418 					       RXE_LOOKUP_LOCAL);
419 				if (!mr) {
420 					err = -EINVAL;
421 					goto err1;
422 				}
423 			} else {
424 				continue;
425 			}
426 		}
427 
428 		if (bytes > sge->length - offset)
429 			bytes = sge->length - offset;
430 
431 		if (bytes > 0) {
432 			iova = sge->addr + offset;
433 
434 			err = rxe_mr_copy(mr, iova, addr, bytes, dir);
435 			if (err)
436 				goto err2;
437 
438 			offset	+= bytes;
439 			resid	-= bytes;
440 			length	-= bytes;
441 			addr	+= bytes;
442 		}
443 	}
444 
445 	dma->sge_offset = offset;
446 	dma->resid	= resid;
447 
448 	if (mr)
449 		rxe_drop_ref(mr);
450 
451 	return 0;
452 
453 err2:
454 	if (mr)
455 		rxe_drop_ref(mr);
456 err1:
457 	return err;
458 }
459 
460 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length)
461 {
462 	struct rxe_sge		*sge	= &dma->sge[dma->cur_sge];
463 	int			offset	= dma->sge_offset;
464 	int			resid	= dma->resid;
465 
466 	while (length) {
467 		unsigned int bytes;
468 
469 		if (offset >= sge->length) {
470 			sge++;
471 			dma->cur_sge++;
472 			offset = 0;
473 			if (dma->cur_sge >= dma->num_sge)
474 				return -ENOSPC;
475 		}
476 
477 		bytes = length;
478 
479 		if (bytes > sge->length - offset)
480 			bytes = sge->length - offset;
481 
482 		offset	+= bytes;
483 		resid	-= bytes;
484 		length	-= bytes;
485 	}
486 
487 	dma->sge_offset = offset;
488 	dma->resid	= resid;
489 
490 	return 0;
491 }
492 
493 /* (1) find the mr corresponding to lkey/rkey
494  *     depending on lookup_type
495  * (2) verify that the (qp) pd matches the mr pd
496  * (3) verify that the mr can support the requested access
497  * (4) verify that mr state is valid
498  */
499 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key,
500 			 enum rxe_mr_lookup_type type)
501 {
502 	struct rxe_mr *mr;
503 	struct rxe_dev *rxe = to_rdev(pd->ibpd.device);
504 	int index = key >> 8;
505 
506 	mr = rxe_pool_get_index(&rxe->mr_pool, index);
507 	if (!mr)
508 		return NULL;
509 
510 	if (unlikely((type == RXE_LOOKUP_LOCAL && mr_lkey(mr) != key) ||
511 		     (type == RXE_LOOKUP_REMOTE && mr_rkey(mr) != key) ||
512 		     mr_pd(mr) != pd || (access && !(access & mr->access)) ||
513 		     mr->state != RXE_MR_STATE_VALID)) {
514 		rxe_drop_ref(mr);
515 		mr = NULL;
516 	}
517 
518 	return mr;
519 }
520 
521 int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey)
522 {
523 	struct rxe_dev *rxe = to_rdev(qp->ibqp.device);
524 	struct rxe_mr *mr;
525 	int ret;
526 
527 	mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8);
528 	if (!mr) {
529 		pr_err("%s: No MR for rkey %#x\n", __func__, rkey);
530 		ret = -EINVAL;
531 		goto err;
532 	}
533 
534 	if (rkey != mr->ibmr.rkey) {
535 		pr_err("%s: rkey (%#x) doesn't match mr->ibmr.rkey (%#x)\n",
536 			__func__, rkey, mr->ibmr.rkey);
537 		ret = -EINVAL;
538 		goto err_drop_ref;
539 	}
540 
541 	if (atomic_read(&mr->num_mw) > 0) {
542 		pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n",
543 			__func__);
544 		ret = -EINVAL;
545 		goto err_drop_ref;
546 	}
547 
548 	mr->state = RXE_MR_STATE_FREE;
549 	ret = 0;
550 
551 err_drop_ref:
552 	rxe_drop_ref(mr);
553 err:
554 	return ret;
555 }
556 
557 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
558 {
559 	struct rxe_mr *mr = to_rmr(ibmr);
560 
561 	if (atomic_read(&mr->num_mw) > 0) {
562 		pr_warn("%s: Attempt to deregister an MR while bound to MWs\n",
563 			__func__);
564 		return -EINVAL;
565 	}
566 
567 	mr->state = RXE_MR_STATE_ZOMBIE;
568 	rxe_drop_ref(mr_pd(mr));
569 	rxe_drop_index(mr);
570 	rxe_drop_ref(mr);
571 
572 	return 0;
573 }
574 
575 void rxe_mr_cleanup(struct rxe_pool_entry *arg)
576 {
577 	struct rxe_mr *mr = container_of(arg, typeof(*mr), pelem);
578 	int i;
579 
580 	ib_umem_release(mr->umem);
581 
582 	if (mr->map) {
583 		for (i = 0; i < mr->num_map; i++)
584 			kfree(mr->map[i]);
585 
586 		kfree(mr->map);
587 	}
588 }
589