xref: /openbmc/linux/drivers/infiniband/hw/mlx5/mr.c (revision cd5d5810)
1 /*
2  * Copyright (c) 2013, Mellanox Technologies inc.  All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 
34 #include <linux/kref.h>
35 #include <linux/random.h>
36 #include <linux/debugfs.h>
37 #include <linux/export.h>
38 #include <rdma/ib_umem.h>
39 #include "mlx5_ib.h"
40 
41 enum {
42 	DEF_CACHE_SIZE	= 10,
43 };
44 
45 enum {
46 	MLX5_UMR_ALIGN	= 2048
47 };
48 
49 static __be64 *mr_align(__be64 *ptr, int align)
50 {
51 	unsigned long mask = align - 1;
52 
53 	return (__be64 *)(((unsigned long)ptr + mask) & ~mask);
54 }
55 
56 static int order2idx(struct mlx5_ib_dev *dev, int order)
57 {
58 	struct mlx5_mr_cache *cache = &dev->cache;
59 
60 	if (order < cache->ent[0].order)
61 		return 0;
62 	else
63 		return order - cache->ent[0].order;
64 }
65 
66 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
67 {
68 	struct mlx5_mr_cache *cache = &dev->cache;
69 	struct mlx5_cache_ent *ent = &cache->ent[c];
70 	struct mlx5_create_mkey_mbox_in *in;
71 	struct mlx5_ib_mr *mr;
72 	int npages = 1 << ent->order;
73 	int err = 0;
74 	int i;
75 
76 	in = kzalloc(sizeof(*in), GFP_KERNEL);
77 	if (!in)
78 		return -ENOMEM;
79 
80 	for (i = 0; i < num; i++) {
81 		mr = kzalloc(sizeof(*mr), GFP_KERNEL);
82 		if (!mr) {
83 			err = -ENOMEM;
84 			goto out;
85 		}
86 		mr->order = ent->order;
87 		mr->umred = 1;
88 		in->seg.status = 1 << 6;
89 		in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2);
90 		in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
91 		in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN;
92 		in->seg.log2_page_size = 12;
93 
94 		err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in,
95 					    sizeof(*in));
96 		if (err) {
97 			mlx5_ib_warn(dev, "create mkey failed %d\n", err);
98 			kfree(mr);
99 			goto out;
100 		}
101 		cache->last_add = jiffies;
102 
103 		spin_lock(&ent->lock);
104 		list_add_tail(&mr->list, &ent->head);
105 		ent->cur++;
106 		ent->size++;
107 		spin_unlock(&ent->lock);
108 	}
109 
110 out:
111 	kfree(in);
112 	return err;
113 }
114 
115 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
116 {
117 	struct mlx5_mr_cache *cache = &dev->cache;
118 	struct mlx5_cache_ent *ent = &cache->ent[c];
119 	struct mlx5_ib_mr *mr;
120 	int err;
121 	int i;
122 
123 	for (i = 0; i < num; i++) {
124 		spin_lock(&ent->lock);
125 		if (list_empty(&ent->head)) {
126 			spin_unlock(&ent->lock);
127 			return;
128 		}
129 		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
130 		list_del(&mr->list);
131 		ent->cur--;
132 		ent->size--;
133 		spin_unlock(&ent->lock);
134 		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
135 		if (err)
136 			mlx5_ib_warn(dev, "failed destroy mkey\n");
137 		else
138 			kfree(mr);
139 	}
140 }
141 
142 static ssize_t size_write(struct file *filp, const char __user *buf,
143 			  size_t count, loff_t *pos)
144 {
145 	struct mlx5_cache_ent *ent = filp->private_data;
146 	struct mlx5_ib_dev *dev = ent->dev;
147 	char lbuf[20];
148 	u32 var;
149 	int err;
150 	int c;
151 
152 	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
153 		return -EFAULT;
154 
155 	c = order2idx(dev, ent->order);
156 	lbuf[sizeof(lbuf) - 1] = 0;
157 
158 	if (sscanf(lbuf, "%u", &var) != 1)
159 		return -EINVAL;
160 
161 	if (var < ent->limit)
162 		return -EINVAL;
163 
164 	if (var > ent->size) {
165 		err = add_keys(dev, c, var - ent->size);
166 		if (err)
167 			return err;
168 	} else if (var < ent->size) {
169 		remove_keys(dev, c, ent->size - var);
170 	}
171 
172 	return count;
173 }
174 
175 static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
176 			 loff_t *pos)
177 {
178 	struct mlx5_cache_ent *ent = filp->private_data;
179 	char lbuf[20];
180 	int err;
181 
182 	if (*pos)
183 		return 0;
184 
185 	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
186 	if (err < 0)
187 		return err;
188 
189 	if (copy_to_user(buf, lbuf, err))
190 		return -EFAULT;
191 
192 	*pos += err;
193 
194 	return err;
195 }
196 
197 static const struct file_operations size_fops = {
198 	.owner	= THIS_MODULE,
199 	.open	= simple_open,
200 	.write	= size_write,
201 	.read	= size_read,
202 };
203 
204 static ssize_t limit_write(struct file *filp, const char __user *buf,
205 			   size_t count, loff_t *pos)
206 {
207 	struct mlx5_cache_ent *ent = filp->private_data;
208 	struct mlx5_ib_dev *dev = ent->dev;
209 	char lbuf[20];
210 	u32 var;
211 	int err;
212 	int c;
213 
214 	if (copy_from_user(lbuf, buf, sizeof(lbuf)))
215 		return -EFAULT;
216 
217 	c = order2idx(dev, ent->order);
218 	lbuf[sizeof(lbuf) - 1] = 0;
219 
220 	if (sscanf(lbuf, "%u", &var) != 1)
221 		return -EINVAL;
222 
223 	if (var > ent->size)
224 		return -EINVAL;
225 
226 	ent->limit = var;
227 
228 	if (ent->cur < ent->limit) {
229 		err = add_keys(dev, c, 2 * ent->limit - ent->cur);
230 		if (err)
231 			return err;
232 	}
233 
234 	return count;
235 }
236 
237 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
238 			  loff_t *pos)
239 {
240 	struct mlx5_cache_ent *ent = filp->private_data;
241 	char lbuf[20];
242 	int err;
243 
244 	if (*pos)
245 		return 0;
246 
247 	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
248 	if (err < 0)
249 		return err;
250 
251 	if (copy_to_user(buf, lbuf, err))
252 		return -EFAULT;
253 
254 	*pos += err;
255 
256 	return err;
257 }
258 
259 static const struct file_operations limit_fops = {
260 	.owner	= THIS_MODULE,
261 	.open	= simple_open,
262 	.write	= limit_write,
263 	.read	= limit_read,
264 };
265 
266 static int someone_adding(struct mlx5_mr_cache *cache)
267 {
268 	int i;
269 
270 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
271 		if (cache->ent[i].cur < cache->ent[i].limit)
272 			return 1;
273 	}
274 
275 	return 0;
276 }
277 
278 static void __cache_work_func(struct mlx5_cache_ent *ent)
279 {
280 	struct mlx5_ib_dev *dev = ent->dev;
281 	struct mlx5_mr_cache *cache = &dev->cache;
282 	int i = order2idx(dev, ent->order);
283 
284 	if (cache->stopped)
285 		return;
286 
287 	ent = &dev->cache.ent[i];
288 	if (ent->cur < 2 * ent->limit) {
289 		add_keys(dev, i, 1);
290 		if (ent->cur < 2 * ent->limit)
291 			queue_work(cache->wq, &ent->work);
292 	} else if (ent->cur > 2 * ent->limit) {
293 		if (!someone_adding(cache) &&
294 		    time_after(jiffies, cache->last_add + 60 * HZ)) {
295 			remove_keys(dev, i, 1);
296 			if (ent->cur > ent->limit)
297 				queue_work(cache->wq, &ent->work);
298 		} else {
299 			queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ);
300 		}
301 	}
302 }
303 
304 static void delayed_cache_work_func(struct work_struct *work)
305 {
306 	struct mlx5_cache_ent *ent;
307 
308 	ent = container_of(work, struct mlx5_cache_ent, dwork.work);
309 	__cache_work_func(ent);
310 }
311 
312 static void cache_work_func(struct work_struct *work)
313 {
314 	struct mlx5_cache_ent *ent;
315 
316 	ent = container_of(work, struct mlx5_cache_ent, work);
317 	__cache_work_func(ent);
318 }
319 
320 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
321 {
322 	struct mlx5_mr_cache *cache = &dev->cache;
323 	struct mlx5_ib_mr *mr = NULL;
324 	struct mlx5_cache_ent *ent;
325 	int c;
326 	int i;
327 
328 	c = order2idx(dev, order);
329 	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
330 		mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
331 		return NULL;
332 	}
333 
334 	for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) {
335 		ent = &cache->ent[i];
336 
337 		mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
338 
339 		spin_lock(&ent->lock);
340 		if (!list_empty(&ent->head)) {
341 			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
342 					      list);
343 			list_del(&mr->list);
344 			ent->cur--;
345 			spin_unlock(&ent->lock);
346 			if (ent->cur < ent->limit)
347 				queue_work(cache->wq, &ent->work);
348 			break;
349 		}
350 		spin_unlock(&ent->lock);
351 
352 		queue_work(cache->wq, &ent->work);
353 
354 		if (mr)
355 			break;
356 	}
357 
358 	if (!mr)
359 		cache->ent[c].miss++;
360 
361 	return mr;
362 }
363 
364 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
365 {
366 	struct mlx5_mr_cache *cache = &dev->cache;
367 	struct mlx5_cache_ent *ent;
368 	int shrink = 0;
369 	int c;
370 
371 	c = order2idx(dev, mr->order);
372 	if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) {
373 		mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c);
374 		return;
375 	}
376 	ent = &cache->ent[c];
377 	spin_lock(&ent->lock);
378 	list_add_tail(&mr->list, &ent->head);
379 	ent->cur++;
380 	if (ent->cur > 2 * ent->limit)
381 		shrink = 1;
382 	spin_unlock(&ent->lock);
383 
384 	if (shrink)
385 		queue_work(cache->wq, &ent->work);
386 }
387 
388 static void clean_keys(struct mlx5_ib_dev *dev, int c)
389 {
390 	struct mlx5_mr_cache *cache = &dev->cache;
391 	struct mlx5_cache_ent *ent = &cache->ent[c];
392 	struct mlx5_ib_mr *mr;
393 	int err;
394 
395 	cancel_delayed_work(&ent->dwork);
396 	while (1) {
397 		spin_lock(&ent->lock);
398 		if (list_empty(&ent->head)) {
399 			spin_unlock(&ent->lock);
400 			return;
401 		}
402 		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
403 		list_del(&mr->list);
404 		ent->cur--;
405 		ent->size--;
406 		spin_unlock(&ent->lock);
407 		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
408 		if (err)
409 			mlx5_ib_warn(dev, "failed destroy mkey\n");
410 		else
411 			kfree(mr);
412 	}
413 }
414 
415 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
416 {
417 	struct mlx5_mr_cache *cache = &dev->cache;
418 	struct mlx5_cache_ent *ent;
419 	int i;
420 
421 	if (!mlx5_debugfs_root)
422 		return 0;
423 
424 	cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root);
425 	if (!cache->root)
426 		return -ENOMEM;
427 
428 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
429 		ent = &cache->ent[i];
430 		sprintf(ent->name, "%d", ent->order);
431 		ent->dir = debugfs_create_dir(ent->name,  cache->root);
432 		if (!ent->dir)
433 			return -ENOMEM;
434 
435 		ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent,
436 						 &size_fops);
437 		if (!ent->fsize)
438 			return -ENOMEM;
439 
440 		ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent,
441 						  &limit_fops);
442 		if (!ent->flimit)
443 			return -ENOMEM;
444 
445 		ent->fcur = debugfs_create_u32("cur", 0400, ent->dir,
446 					       &ent->cur);
447 		if (!ent->fcur)
448 			return -ENOMEM;
449 
450 		ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir,
451 						&ent->miss);
452 		if (!ent->fmiss)
453 			return -ENOMEM;
454 	}
455 
456 	return 0;
457 }
458 
459 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
460 {
461 	if (!mlx5_debugfs_root)
462 		return;
463 
464 	debugfs_remove_recursive(dev->cache.root);
465 }
466 
467 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
468 {
469 	struct mlx5_mr_cache *cache = &dev->cache;
470 	struct mlx5_cache_ent *ent;
471 	int limit;
472 	int size;
473 	int err;
474 	int i;
475 
476 	cache->wq = create_singlethread_workqueue("mkey_cache");
477 	if (!cache->wq) {
478 		mlx5_ib_warn(dev, "failed to create work queue\n");
479 		return -ENOMEM;
480 	}
481 
482 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
483 		INIT_LIST_HEAD(&cache->ent[i].head);
484 		spin_lock_init(&cache->ent[i].lock);
485 
486 		ent = &cache->ent[i];
487 		INIT_LIST_HEAD(&ent->head);
488 		spin_lock_init(&ent->lock);
489 		ent->order = i + 2;
490 		ent->dev = dev;
491 
492 		if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) {
493 			size = dev->mdev.profile->mr_cache[i].size;
494 			limit = dev->mdev.profile->mr_cache[i].limit;
495 		} else {
496 			size = DEF_CACHE_SIZE;
497 			limit = 0;
498 		}
499 		INIT_WORK(&ent->work, cache_work_func);
500 		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
501 		ent->limit = limit;
502 		queue_work(cache->wq, &ent->work);
503 	}
504 
505 	err = mlx5_mr_cache_debugfs_init(dev);
506 	if (err)
507 		mlx5_ib_warn(dev, "cache debugfs failure\n");
508 
509 	return 0;
510 }
511 
512 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
513 {
514 	int i;
515 
516 	dev->cache.stopped = 1;
517 	flush_workqueue(dev->cache.wq);
518 
519 	mlx5_mr_cache_debugfs_cleanup(dev);
520 
521 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
522 		clean_keys(dev, i);
523 
524 	destroy_workqueue(dev->cache.wq);
525 
526 	return 0;
527 }
528 
529 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
530 {
531 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
532 	struct mlx5_core_dev *mdev = &dev->mdev;
533 	struct mlx5_create_mkey_mbox_in *in;
534 	struct mlx5_mkey_seg *seg;
535 	struct mlx5_ib_mr *mr;
536 	int err;
537 
538 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
539 	if (!mr)
540 		return ERR_PTR(-ENOMEM);
541 
542 	in = kzalloc(sizeof(*in), GFP_KERNEL);
543 	if (!in) {
544 		err = -ENOMEM;
545 		goto err_free;
546 	}
547 
548 	seg = &in->seg;
549 	seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA;
550 	seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64);
551 	seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
552 	seg->start_addr = 0;
553 
554 	err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in));
555 	if (err)
556 		goto err_in;
557 
558 	kfree(in);
559 	mr->ibmr.lkey = mr->mmr.key;
560 	mr->ibmr.rkey = mr->mmr.key;
561 	mr->umem = NULL;
562 
563 	return &mr->ibmr;
564 
565 err_in:
566 	kfree(in);
567 
568 err_free:
569 	kfree(mr);
570 
571 	return ERR_PTR(err);
572 }
573 
574 static int get_octo_len(u64 addr, u64 len, int page_size)
575 {
576 	u64 offset;
577 	int npages;
578 
579 	offset = addr & (page_size - 1);
580 	npages = ALIGN(len + offset, page_size) >> ilog2(page_size);
581 	return (npages + 1) / 2;
582 }
583 
584 static int use_umr(int order)
585 {
586 	return order <= 17;
587 }
588 
589 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr,
590 			     struct ib_sge *sg, u64 dma, int n, u32 key,
591 			     int page_shift, u64 virt_addr, u64 len,
592 			     int access_flags)
593 {
594 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
595 	struct ib_mr *mr = dev->umrc.mr;
596 
597 	sg->addr = dma;
598 	sg->length = ALIGN(sizeof(u64) * n, 64);
599 	sg->lkey = mr->lkey;
600 
601 	wr->next = NULL;
602 	wr->send_flags = 0;
603 	wr->sg_list = sg;
604 	if (n)
605 		wr->num_sge = 1;
606 	else
607 		wr->num_sge = 0;
608 
609 	wr->opcode = MLX5_IB_WR_UMR;
610 	wr->wr.fast_reg.page_list_len = n;
611 	wr->wr.fast_reg.page_shift = page_shift;
612 	wr->wr.fast_reg.rkey = key;
613 	wr->wr.fast_reg.iova_start = virt_addr;
614 	wr->wr.fast_reg.length = len;
615 	wr->wr.fast_reg.access_flags = access_flags;
616 	wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd;
617 }
618 
619 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev,
620 			       struct ib_send_wr *wr, u32 key)
621 {
622 	wr->send_flags = MLX5_IB_SEND_UMR_UNREG;
623 	wr->opcode = MLX5_IB_WR_UMR;
624 	wr->wr.fast_reg.rkey = key;
625 }
626 
627 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context)
628 {
629 	struct mlx5_ib_mr *mr;
630 	struct ib_wc wc;
631 	int err;
632 
633 	while (1) {
634 		err = ib_poll_cq(cq, 1, &wc);
635 		if (err < 0) {
636 			pr_warn("poll cq error %d\n", err);
637 			return;
638 		}
639 		if (err == 0)
640 			break;
641 
642 		mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id;
643 		mr->status = wc.status;
644 		complete(&mr->done);
645 	}
646 	ib_req_notify_cq(cq, IB_CQ_NEXT_COMP);
647 }
648 
649 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem,
650 				  u64 virt_addr, u64 len, int npages,
651 				  int page_shift, int order, int access_flags)
652 {
653 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
654 	struct device *ddev = dev->ib_dev.dma_device;
655 	struct umr_common *umrc = &dev->umrc;
656 	struct ib_send_wr wr, *bad;
657 	struct mlx5_ib_mr *mr;
658 	struct ib_sge sg;
659 	int size = sizeof(u64) * npages;
660 	int err;
661 	int i;
662 
663 	for (i = 0; i < 10; i++) {
664 		mr = alloc_cached_mr(dev, order);
665 		if (mr)
666 			break;
667 
668 		err = add_keys(dev, order2idx(dev, order), 1);
669 		if (err) {
670 			mlx5_ib_warn(dev, "add_keys failed\n");
671 			break;
672 		}
673 	}
674 
675 	if (!mr)
676 		return ERR_PTR(-EAGAIN);
677 
678 	mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL);
679 	if (!mr->pas) {
680 		err = -ENOMEM;
681 		goto error;
682 	}
683 
684 	mlx5_ib_populate_pas(dev, umem, page_shift,
685 			     mr_align(mr->pas, MLX5_UMR_ALIGN), 1);
686 
687 	mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size,
688 				 DMA_TO_DEVICE);
689 	if (dma_mapping_error(ddev, mr->dma)) {
690 		kfree(mr->pas);
691 		err = -ENOMEM;
692 		goto error;
693 	}
694 
695 	memset(&wr, 0, sizeof(wr));
696 	wr.wr_id = (u64)(unsigned long)mr;
697 	prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags);
698 
699 	/* We serialize polls so one process does not kidnap another's
700 	 * completion. This is not a problem since wr is completed in
701 	 * around 1 usec
702 	 */
703 	down(&umrc->sem);
704 	init_completion(&mr->done);
705 	err = ib_post_send(umrc->qp, &wr, &bad);
706 	if (err) {
707 		mlx5_ib_warn(dev, "post send failed, err %d\n", err);
708 		up(&umrc->sem);
709 		goto error;
710 	}
711 	wait_for_completion(&mr->done);
712 	up(&umrc->sem);
713 
714 	dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE);
715 	kfree(mr->pas);
716 
717 	if (mr->status != IB_WC_SUCCESS) {
718 		mlx5_ib_warn(dev, "reg umr failed\n");
719 		err = -EFAULT;
720 		goto error;
721 	}
722 
723 	return mr;
724 
725 error:
726 	free_cached_mr(dev, mr);
727 	return ERR_PTR(err);
728 }
729 
730 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr,
731 				     u64 length, struct ib_umem *umem,
732 				     int npages, int page_shift,
733 				     int access_flags)
734 {
735 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
736 	struct mlx5_create_mkey_mbox_in *in;
737 	struct mlx5_ib_mr *mr;
738 	int inlen;
739 	int err;
740 
741 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
742 	if (!mr)
743 		return ERR_PTR(-ENOMEM);
744 
745 	inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2;
746 	in = mlx5_vzalloc(inlen);
747 	if (!in) {
748 		err = -ENOMEM;
749 		goto err_1;
750 	}
751 	mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0);
752 
753 	in->seg.flags = convert_access(access_flags) |
754 		MLX5_ACCESS_MODE_MTT;
755 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
756 	in->seg.start_addr = cpu_to_be64(virt_addr);
757 	in->seg.len = cpu_to_be64(length);
758 	in->seg.bsfs_octo_size = 0;
759 	in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
760 	in->seg.log2_page_size = page_shift;
761 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
762 	in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift));
763 	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen);
764 	if (err) {
765 		mlx5_ib_warn(dev, "create mkey failed\n");
766 		goto err_2;
767 	}
768 	mr->umem = umem;
769 	mlx5_vfree(in);
770 
771 	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key);
772 
773 	return mr;
774 
775 err_2:
776 	mlx5_vfree(in);
777 
778 err_1:
779 	kfree(mr);
780 
781 	return ERR_PTR(err);
782 }
783 
784 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
785 				  u64 virt_addr, int access_flags,
786 				  struct ib_udata *udata)
787 {
788 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
789 	struct mlx5_ib_mr *mr = NULL;
790 	struct ib_umem *umem;
791 	int page_shift;
792 	int npages;
793 	int ncont;
794 	int order;
795 	int err;
796 
797 	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n",
798 		    start, virt_addr, length);
799 	umem = ib_umem_get(pd->uobject->context, start, length, access_flags,
800 			   0);
801 	if (IS_ERR(umem)) {
802 		mlx5_ib_dbg(dev, "umem get failed\n");
803 		return (void *)umem;
804 	}
805 
806 	mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order);
807 	if (!npages) {
808 		mlx5_ib_warn(dev, "avoid zero region\n");
809 		err = -EINVAL;
810 		goto error;
811 	}
812 
813 	mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
814 		    npages, ncont, order, page_shift);
815 
816 	if (use_umr(order)) {
817 		mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift,
818 			     order, access_flags);
819 		if (PTR_ERR(mr) == -EAGAIN) {
820 			mlx5_ib_dbg(dev, "cache empty for order %d", order);
821 			mr = NULL;
822 		}
823 	}
824 
825 	if (!mr)
826 		mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift,
827 				access_flags);
828 
829 	if (IS_ERR(mr)) {
830 		err = PTR_ERR(mr);
831 		goto error;
832 	}
833 
834 	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key);
835 
836 	mr->umem = umem;
837 	mr->npages = npages;
838 	spin_lock(&dev->mr_lock);
839 	dev->mdev.priv.reg_pages += npages;
840 	spin_unlock(&dev->mr_lock);
841 	mr->ibmr.lkey = mr->mmr.key;
842 	mr->ibmr.rkey = mr->mmr.key;
843 
844 	return &mr->ibmr;
845 
846 error:
847 	ib_umem_release(umem);
848 	return ERR_PTR(err);
849 }
850 
851 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
852 {
853 	struct umr_common *umrc = &dev->umrc;
854 	struct ib_send_wr wr, *bad;
855 	int err;
856 
857 	memset(&wr, 0, sizeof(wr));
858 	wr.wr_id = (u64)(unsigned long)mr;
859 	prep_umr_unreg_wqe(dev, &wr, mr->mmr.key);
860 
861 	down(&umrc->sem);
862 	init_completion(&mr->done);
863 	err = ib_post_send(umrc->qp, &wr, &bad);
864 	if (err) {
865 		up(&umrc->sem);
866 		mlx5_ib_dbg(dev, "err %d\n", err);
867 		goto error;
868 	}
869 	wait_for_completion(&mr->done);
870 	up(&umrc->sem);
871 	if (mr->status != IB_WC_SUCCESS) {
872 		mlx5_ib_warn(dev, "unreg umr failed\n");
873 		err = -EFAULT;
874 		goto error;
875 	}
876 	return 0;
877 
878 error:
879 	return err;
880 }
881 
882 int mlx5_ib_dereg_mr(struct ib_mr *ibmr)
883 {
884 	struct mlx5_ib_dev *dev = to_mdev(ibmr->device);
885 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
886 	struct ib_umem *umem = mr->umem;
887 	int npages = mr->npages;
888 	int umred = mr->umred;
889 	int err;
890 
891 	if (!umred) {
892 		err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr);
893 		if (err) {
894 			mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n",
895 				     mr->mmr.key, err);
896 			return err;
897 		}
898 	} else {
899 		err = unreg_umr(dev, mr);
900 		if (err) {
901 			mlx5_ib_warn(dev, "failed unregister\n");
902 			return err;
903 		}
904 		free_cached_mr(dev, mr);
905 	}
906 
907 	if (umem) {
908 		ib_umem_release(umem);
909 		spin_lock(&dev->mr_lock);
910 		dev->mdev.priv.reg_pages -= npages;
911 		spin_unlock(&dev->mr_lock);
912 	}
913 
914 	if (!umred)
915 		kfree(mr);
916 
917 	return 0;
918 }
919 
920 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd,
921 					int max_page_list_len)
922 {
923 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
924 	struct mlx5_create_mkey_mbox_in *in;
925 	struct mlx5_ib_mr *mr;
926 	int err;
927 
928 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
929 	if (!mr)
930 		return ERR_PTR(-ENOMEM);
931 
932 	in = kzalloc(sizeof(*in), GFP_KERNEL);
933 	if (!in) {
934 		err = -ENOMEM;
935 		goto err_free;
936 	}
937 
938 	in->seg.status = 1 << 6; /* free */
939 	in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2);
940 	in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8);
941 	in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT;
942 	in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn);
943 	/*
944 	 * TBD not needed - issue 197292 */
945 	in->seg.log2_page_size = PAGE_SHIFT;
946 
947 	err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in));
948 	kfree(in);
949 	if (err)
950 		goto err_free;
951 
952 	mr->ibmr.lkey = mr->mmr.key;
953 	mr->ibmr.rkey = mr->mmr.key;
954 	mr->umem = NULL;
955 
956 	return &mr->ibmr;
957 
958 err_free:
959 	kfree(mr);
960 	return ERR_PTR(err);
961 }
962 
963 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev,
964 							       int page_list_len)
965 {
966 	struct mlx5_ib_fast_reg_page_list *mfrpl;
967 	int size = page_list_len * sizeof(u64);
968 
969 	mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL);
970 	if (!mfrpl)
971 		return ERR_PTR(-ENOMEM);
972 
973 	mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL);
974 	if (!mfrpl->ibfrpl.page_list)
975 		goto err_free;
976 
977 	mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device,
978 						     size, &mfrpl->map,
979 						     GFP_KERNEL);
980 	if (!mfrpl->mapped_page_list)
981 		goto err_free;
982 
983 	WARN_ON(mfrpl->map & 0x3f);
984 
985 	return &mfrpl->ibfrpl;
986 
987 err_free:
988 	kfree(mfrpl->ibfrpl.page_list);
989 	kfree(mfrpl);
990 	return ERR_PTR(-ENOMEM);
991 }
992 
993 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list)
994 {
995 	struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list);
996 	struct mlx5_ib_dev *dev = to_mdev(page_list->device);
997 	int size = page_list->max_page_list_len * sizeof(u64);
998 
999 	dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list,
1000 			  mfrpl->map);
1001 	kfree(mfrpl->ibfrpl.page_list);
1002 	kfree(mfrpl);
1003 }
1004