xref: /openbmc/linux/drivers/vdpa/mlx5/core/mr.c (revision 5ebfa90bdd3d78f4967dc0095daf755989a999e0)
1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
2 /* Copyright (c) 2020 Mellanox Technologies Ltd. */
3 
4 #include <linux/vhost_types.h>
5 #include <linux/vdpa.h>
6 #include <linux/gcd.h>
7 #include <linux/string.h>
8 #include <linux/mlx5/qp.h>
9 #include "mlx5_vdpa.h"
10 
11 /* DIV_ROUND_UP where the divider is a power of 2 give by its log base 2 value */
12 #define MLX5_DIV_ROUND_UP_POW2(_n, _s) \
13 ({ \
14 	u64 __s = _s; \
15 	u64 _res; \
16 	_res = (((_n) + (1 << (__s)) - 1) >> (__s)); \
17 	_res; \
18 })
19 
20 static int get_octo_len(u64 len, int page_shift)
21 {
22 	u64 page_size = 1ULL << page_shift;
23 	int npages;
24 
25 	npages = ALIGN(len, page_size) >> page_shift;
26 	return (npages + 1) / 2;
27 }
28 
29 static void mlx5_set_access_mode(void *mkc, int mode)
30 {
31 	MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
32 	MLX5_SET(mkc, mkc, access_mode_4_2, mode >> 2);
33 }
34 
35 static void populate_mtts(struct mlx5_vdpa_direct_mr *mr, __be64 *mtt)
36 {
37 	struct scatterlist *sg;
38 	int nsg = mr->nsg;
39 	u64 dma_addr;
40 	u64 dma_len;
41 	int j = 0;
42 	int i;
43 
44 	for_each_sg(mr->sg_head.sgl, sg, mr->nent, i) {
45 		for (dma_addr = sg_dma_address(sg), dma_len = sg_dma_len(sg);
46 		     nsg && dma_len;
47 		     nsg--, dma_addr += BIT(mr->log_size), dma_len -= BIT(mr->log_size))
48 			mtt[j++] = cpu_to_be64(dma_addr);
49 	}
50 }
51 
52 static int create_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
53 {
54 	int inlen;
55 	void *mkc;
56 	void *in;
57 	int err;
58 
59 	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + roundup(MLX5_ST_SZ_BYTES(mtt) * mr->nsg, 16);
60 	in = kvzalloc(inlen, GFP_KERNEL);
61 	if (!in)
62 		return -ENOMEM;
63 
64 	MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
65 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
66 	MLX5_SET(mkc, mkc, lw, !!(mr->perm & VHOST_MAP_WO));
67 	MLX5_SET(mkc, mkc, lr, !!(mr->perm & VHOST_MAP_RO));
68 	mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_MTT);
69 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
70 	MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
71 	MLX5_SET64(mkc, mkc, start_addr, mr->offset);
72 	MLX5_SET64(mkc, mkc, len, mr->end - mr->start);
73 	MLX5_SET(mkc, mkc, log_page_size, mr->log_size);
74 	MLX5_SET(mkc, mkc, translations_octword_size,
75 		 get_octo_len(mr->end - mr->start, mr->log_size));
76 	MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
77 		 get_octo_len(mr->end - mr->start, mr->log_size));
78 	populate_mtts(mr, MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt));
79 	err = mlx5_vdpa_create_mkey(mvdev, &mr->mr, in, inlen);
80 	kvfree(in);
81 	if (err) {
82 		mlx5_vdpa_warn(mvdev, "Failed to create direct MR\n");
83 		return err;
84 	}
85 
86 	return 0;
87 }
88 
89 static void destroy_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
90 {
91 	mlx5_vdpa_destroy_mkey(mvdev, mr->mr);
92 }
93 
94 static u64 map_start(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
95 {
96 	return max_t(u64, map->start, mr->start);
97 }
98 
99 static u64 map_end(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
100 {
101 	return min_t(u64, map->last + 1, mr->end);
102 }
103 
104 static u64 maplen(struct vhost_iotlb_map *map, struct mlx5_vdpa_direct_mr *mr)
105 {
106 	return map_end(map, mr) - map_start(map, mr);
107 }
108 
109 #define MLX5_VDPA_INVALID_START_ADDR ((u64)-1)
110 #define MLX5_VDPA_INVALID_LEN ((u64)-1)
111 
112 static u64 indir_start_addr(struct mlx5_vdpa_mr *mkey)
113 {
114 	struct mlx5_vdpa_direct_mr *s;
115 
116 	s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
117 	if (!s)
118 		return MLX5_VDPA_INVALID_START_ADDR;
119 
120 	return s->start;
121 }
122 
123 static u64 indir_len(struct mlx5_vdpa_mr *mkey)
124 {
125 	struct mlx5_vdpa_direct_mr *s;
126 	struct mlx5_vdpa_direct_mr *e;
127 
128 	s = list_first_entry_or_null(&mkey->head, struct mlx5_vdpa_direct_mr, list);
129 	if (!s)
130 		return MLX5_VDPA_INVALID_LEN;
131 
132 	e = list_last_entry(&mkey->head, struct mlx5_vdpa_direct_mr, list);
133 
134 	return e->end - s->start;
135 }
136 
137 #define LOG_MAX_KLM_SIZE 30
138 #define MAX_KLM_SIZE BIT(LOG_MAX_KLM_SIZE)
139 
140 static u32 klm_bcount(u64 size)
141 {
142 	return (u32)size;
143 }
144 
145 static void fill_indir(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey, void *in)
146 {
147 	struct mlx5_vdpa_direct_mr *dmr;
148 	struct mlx5_klm *klmarr;
149 	struct mlx5_klm *klm;
150 	bool first = true;
151 	u64 preve;
152 	int i;
153 
154 	klmarr = MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
155 	i = 0;
156 	list_for_each_entry(dmr, &mkey->head, list) {
157 again:
158 		klm = &klmarr[i++];
159 		if (first) {
160 			preve = dmr->start;
161 			first = false;
162 		}
163 
164 		if (preve == dmr->start) {
165 			klm->key = cpu_to_be32(dmr->mr);
166 			klm->bcount = cpu_to_be32(klm_bcount(dmr->end - dmr->start));
167 			preve = dmr->end;
168 		} else {
169 			klm->key = cpu_to_be32(mvdev->res.null_mkey);
170 			klm->bcount = cpu_to_be32(klm_bcount(dmr->start - preve));
171 			preve = dmr->start;
172 			goto again;
173 		}
174 	}
175 }
176 
177 static int klm_byte_size(int nklms)
178 {
179 	return 16 * ALIGN(nklms, 4);
180 }
181 
182 static int create_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
183 {
184 	int inlen;
185 	void *mkc;
186 	void *in;
187 	int err;
188 	u64 start;
189 	u64 len;
190 
191 	start = indir_start_addr(mr);
192 	len = indir_len(mr);
193 	if (start == MLX5_VDPA_INVALID_START_ADDR || len == MLX5_VDPA_INVALID_LEN)
194 		return -EINVAL;
195 
196 	inlen = MLX5_ST_SZ_BYTES(create_mkey_in) + klm_byte_size(mr->num_klms);
197 	in = kzalloc(inlen, GFP_KERNEL);
198 	if (!in)
199 		return -ENOMEM;
200 
201 	MLX5_SET(create_mkey_in, in, uid, mvdev->res.uid);
202 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
203 	MLX5_SET(mkc, mkc, lw, 1);
204 	MLX5_SET(mkc, mkc, lr, 1);
205 	mlx5_set_access_mode(mkc, MLX5_MKC_ACCESS_MODE_KLMS);
206 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
207 	MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
208 	MLX5_SET64(mkc, mkc, start_addr, start);
209 	MLX5_SET64(mkc, mkc, len, len);
210 	MLX5_SET(mkc, mkc, translations_octword_size, klm_byte_size(mr->num_klms) / 16);
211 	MLX5_SET(create_mkey_in, in, translations_octword_actual_size, mr->num_klms);
212 	fill_indir(mvdev, mr, in);
213 	err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
214 	kfree(in);
215 	return err;
216 }
217 
218 static void destroy_indirect_key(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mkey)
219 {
220 	mlx5_vdpa_destroy_mkey(mvdev, mkey->mkey);
221 }
222 
223 static int map_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr,
224 			 struct vhost_iotlb *iotlb)
225 {
226 	struct vhost_iotlb_map *map;
227 	unsigned long lgcd = 0;
228 	int log_entity_size;
229 	unsigned long size;
230 	u64 start = 0;
231 	int err;
232 	struct page *pg;
233 	unsigned int nsg;
234 	int sglen;
235 	u64 pa;
236 	u64 paend;
237 	struct scatterlist *sg;
238 	struct device *dma = mvdev->vdev.dma_dev;
239 
240 	for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
241 	     map; map = vhost_iotlb_itree_next(map, start, mr->end - 1)) {
242 		size = maplen(map, mr);
243 		lgcd = gcd(lgcd, size);
244 		start += size;
245 	}
246 	log_entity_size = ilog2(lgcd);
247 
248 	sglen = 1 << log_entity_size;
249 	nsg = MLX5_DIV_ROUND_UP_POW2(mr->end - mr->start, log_entity_size);
250 
251 	err = sg_alloc_table(&mr->sg_head, nsg, GFP_KERNEL);
252 	if (err)
253 		return err;
254 
255 	sg = mr->sg_head.sgl;
256 	for (map = vhost_iotlb_itree_first(iotlb, mr->start, mr->end - 1);
257 	     map; map = vhost_iotlb_itree_next(map, mr->start, mr->end - 1)) {
258 		paend = map->addr + maplen(map, mr);
259 		for (pa = map->addr; pa < paend; pa += sglen) {
260 			pg = pfn_to_page(__phys_to_pfn(pa));
261 			if (!sg) {
262 				mlx5_vdpa_warn(mvdev, "sg null. start 0x%llx, end 0x%llx\n",
263 					       map->start, map->last + 1);
264 				err = -ENOMEM;
265 				goto err_map;
266 			}
267 			sg_set_page(sg, pg, sglen, 0);
268 			sg = sg_next(sg);
269 			if (!sg)
270 				goto done;
271 		}
272 	}
273 done:
274 	mr->log_size = log_entity_size;
275 	mr->nsg = nsg;
276 	mr->nent = dma_map_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
277 	if (!mr->nent) {
278 		err = -ENOMEM;
279 		goto err_map;
280 	}
281 
282 	err = create_direct_mr(mvdev, mr);
283 	if (err)
284 		goto err_direct;
285 
286 	return 0;
287 
288 err_direct:
289 	dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
290 err_map:
291 	sg_free_table(&mr->sg_head);
292 	return err;
293 }
294 
295 static void unmap_direct_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_direct_mr *mr)
296 {
297 	struct device *dma = mvdev->vdev.dma_dev;
298 
299 	destroy_direct_mr(mvdev, mr);
300 	dma_unmap_sg_attrs(dma, mr->sg_head.sgl, mr->nsg, DMA_BIDIRECTIONAL, 0);
301 	sg_free_table(&mr->sg_head);
302 }
303 
304 static int add_direct_chain(struct mlx5_vdpa_dev *mvdev, u64 start, u64 size, u8 perm,
305 			    struct vhost_iotlb *iotlb)
306 {
307 	struct mlx5_vdpa_mr *mr = &mvdev->mr;
308 	struct mlx5_vdpa_direct_mr *dmr;
309 	struct mlx5_vdpa_direct_mr *n;
310 	LIST_HEAD(tmp);
311 	u64 st;
312 	u64 sz;
313 	int err;
314 
315 	st = start;
316 	while (size) {
317 		sz = (u32)min_t(u64, MAX_KLM_SIZE, size);
318 		dmr = kzalloc(sizeof(*dmr), GFP_KERNEL);
319 		if (!dmr) {
320 			err = -ENOMEM;
321 			goto err_alloc;
322 		}
323 
324 		dmr->start = st;
325 		dmr->end = st + sz;
326 		dmr->perm = perm;
327 		err = map_direct_mr(mvdev, dmr, iotlb);
328 		if (err) {
329 			kfree(dmr);
330 			goto err_alloc;
331 		}
332 
333 		list_add_tail(&dmr->list, &tmp);
334 		size -= sz;
335 		mr->num_directs++;
336 		mr->num_klms++;
337 		st += sz;
338 	}
339 	list_splice_tail(&tmp, &mr->head);
340 	return 0;
341 
342 err_alloc:
343 	list_for_each_entry_safe(dmr, n, &mr->head, list) {
344 		list_del_init(&dmr->list);
345 		unmap_direct_mr(mvdev, dmr);
346 		kfree(dmr);
347 	}
348 	return err;
349 }
350 
351 /* The iotlb pointer contains a list of maps. Go over the maps, possibly
352  * merging mergeable maps, and create direct memory keys that provide the
353  * device access to memory. The direct mkeys are then referred to by the
354  * indirect memory key that provides access to the enitre address space given
355  * by iotlb.
356  */
357 static int create_user_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb)
358 {
359 	struct mlx5_vdpa_mr *mr = &mvdev->mr;
360 	struct mlx5_vdpa_direct_mr *dmr;
361 	struct mlx5_vdpa_direct_mr *n;
362 	struct vhost_iotlb_map *map;
363 	u32 pperm = U16_MAX;
364 	u64 last = U64_MAX;
365 	u64 ps = U64_MAX;
366 	u64 pe = U64_MAX;
367 	u64 start = 0;
368 	int err = 0;
369 	int nnuls;
370 
371 	INIT_LIST_HEAD(&mr->head);
372 	for (map = vhost_iotlb_itree_first(iotlb, start, last); map;
373 	     map = vhost_iotlb_itree_next(map, start, last)) {
374 		start = map->start;
375 		if (pe == map->start && pperm == map->perm) {
376 			pe = map->last + 1;
377 		} else {
378 			if (ps != U64_MAX) {
379 				if (pe < map->start) {
380 					/* We have a hole in the map. Check how
381 					 * many null keys are required to fill it.
382 					 */
383 					nnuls = MLX5_DIV_ROUND_UP_POW2(map->start - pe,
384 								       LOG_MAX_KLM_SIZE);
385 					mr->num_klms += nnuls;
386 				}
387 				err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
388 				if (err)
389 					goto err_chain;
390 			}
391 			ps = map->start;
392 			pe = map->last + 1;
393 			pperm = map->perm;
394 		}
395 	}
396 	err = add_direct_chain(mvdev, ps, pe - ps, pperm, iotlb);
397 	if (err)
398 		goto err_chain;
399 
400 	/* Create the memory key that defines the guests's address space. This
401 	 * memory key refers to the direct keys that contain the MTT
402 	 * translations
403 	 */
404 	err = create_indirect_key(mvdev, mr);
405 	if (err)
406 		goto err_chain;
407 
408 	mr->user_mr = true;
409 	return 0;
410 
411 err_chain:
412 	list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
413 		list_del_init(&dmr->list);
414 		unmap_direct_mr(mvdev, dmr);
415 		kfree(dmr);
416 	}
417 	return err;
418 }
419 
420 static int create_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
421 {
422 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
423 	void *mkc;
424 	u32 *in;
425 	int err;
426 
427 	in = kzalloc(inlen, GFP_KERNEL);
428 	if (!in)
429 		return -ENOMEM;
430 
431 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
432 
433 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
434 	MLX5_SET(mkc, mkc, length64, 1);
435 	MLX5_SET(mkc, mkc, lw, 1);
436 	MLX5_SET(mkc, mkc, lr, 1);
437 	MLX5_SET(mkc, mkc, pd, mvdev->res.pdn);
438 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
439 
440 	err = mlx5_vdpa_create_mkey(mvdev, &mr->mkey, in, inlen);
441 	if (!err)
442 		mr->user_mr = false;
443 
444 	kfree(in);
445 	return err;
446 }
447 
448 static void destroy_dma_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
449 {
450 	mlx5_vdpa_destroy_mkey(mvdev, mr->mkey);
451 }
452 
453 static int dup_iotlb(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *src)
454 {
455 	struct vhost_iotlb_map *map;
456 	u64 start = 0, last = ULLONG_MAX;
457 	int err;
458 
459 	if (!src) {
460 		err = vhost_iotlb_add_range(mvdev->cvq.iotlb, start, last, start, VHOST_ACCESS_RW);
461 		return err;
462 	}
463 
464 	for (map = vhost_iotlb_itree_first(src, start, last); map;
465 		map = vhost_iotlb_itree_next(map, start, last)) {
466 		err = vhost_iotlb_add_range(mvdev->cvq.iotlb, map->start, map->last,
467 					    map->addr, map->perm);
468 		if (err)
469 			return err;
470 	}
471 	return 0;
472 }
473 
474 static void prune_iotlb(struct mlx5_vdpa_dev *mvdev)
475 {
476 	vhost_iotlb_del_range(mvdev->cvq.iotlb, 0, ULLONG_MAX);
477 }
478 
479 static void destroy_user_mr(struct mlx5_vdpa_dev *mvdev, struct mlx5_vdpa_mr *mr)
480 {
481 	struct mlx5_vdpa_direct_mr *dmr;
482 	struct mlx5_vdpa_direct_mr *n;
483 
484 	destroy_indirect_key(mvdev, mr);
485 	list_for_each_entry_safe_reverse(dmr, n, &mr->head, list) {
486 		list_del_init(&dmr->list);
487 		unmap_direct_mr(mvdev, dmr);
488 		kfree(dmr);
489 	}
490 }
491 
492 void mlx5_vdpa_destroy_mr(struct mlx5_vdpa_dev *mvdev)
493 {
494 	struct mlx5_vdpa_mr *mr = &mvdev->mr;
495 
496 	mutex_lock(&mr->mkey_mtx);
497 	if (!mr->initialized)
498 		goto out;
499 
500 	prune_iotlb(mvdev);
501 	if (mr->user_mr)
502 		destroy_user_mr(mvdev, mr);
503 	else
504 		destroy_dma_mr(mvdev, mr);
505 
506 	memset(mr, 0, sizeof(*mr));
507 	mr->initialized = false;
508 out:
509 	mutex_unlock(&mr->mkey_mtx);
510 }
511 
512 static int _mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev,
513 				struct vhost_iotlb *iotlb, unsigned int asid)
514 {
515 	struct mlx5_vdpa_mr *mr = &mvdev->mr;
516 	int err;
517 
518 	if (mr->initialized)
519 		return 0;
520 
521 	if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
522 		if (iotlb)
523 			err = create_user_mr(mvdev, iotlb);
524 		else
525 			err = create_dma_mr(mvdev, mr);
526 
527 		if (err)
528 			return err;
529 	}
530 
531 	if (mvdev->group2asid[MLX5_VDPA_CVQ_GROUP] == asid) {
532 		err = dup_iotlb(mvdev, iotlb);
533 		if (err)
534 			goto out_err;
535 	}
536 
537 	mr->initialized = true;
538 	return 0;
539 
540 out_err:
541 	if (mvdev->group2asid[MLX5_VDPA_DATAVQ_GROUP] == asid) {
542 		if (iotlb)
543 			destroy_user_mr(mvdev, mr);
544 		else
545 			destroy_dma_mr(mvdev, mr);
546 	}
547 
548 	return err;
549 }
550 
551 int mlx5_vdpa_create_mr(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
552 			unsigned int asid)
553 {
554 	int err;
555 
556 	mutex_lock(&mvdev->mr.mkey_mtx);
557 	err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid);
558 	mutex_unlock(&mvdev->mr.mkey_mtx);
559 	return err;
560 }
561 
562 int mlx5_vdpa_handle_set_map(struct mlx5_vdpa_dev *mvdev, struct vhost_iotlb *iotlb,
563 			     bool *change_map, unsigned int asid)
564 {
565 	struct mlx5_vdpa_mr *mr = &mvdev->mr;
566 	int err = 0;
567 
568 	*change_map = false;
569 	mutex_lock(&mr->mkey_mtx);
570 	if (mr->initialized) {
571 		mlx5_vdpa_info(mvdev, "memory map update\n");
572 		*change_map = true;
573 	}
574 	if (!*change_map)
575 		err = _mlx5_vdpa_create_mr(mvdev, iotlb, asid);
576 	mutex_unlock(&mr->mkey_mtx);
577 
578 	return err;
579 }
580