xref: /openbmc/linux/drivers/infiniband/hw/mlx5/mr.c (revision c24c57a4)
1 /*
2  * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
3  *
4  * This software is available to you under a choice of one of two
5  * licenses.  You may choose to be licensed under the terms of the GNU
6  * General Public License (GPL) Version 2, available from the file
7  * COPYING in the main directory of this source tree, or the
8  * OpenIB.org BSD license below:
9  *
10  *     Redistribution and use in source and binary forms, with or
11  *     without modification, are permitted provided that the following
12  *     conditions are met:
13  *
14  *      - Redistributions of source code must retain the above
15  *        copyright notice, this list of conditions and the following
16  *        disclaimer.
17  *
18  *      - Redistributions in binary form must reproduce the above
19  *        copyright notice, this list of conditions and the following
20  *        disclaimer in the documentation and/or other materials
21  *        provided with the distribution.
22  *
23  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
24  * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
25  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
26  * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
27  * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
28  * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
29  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
30  * SOFTWARE.
31  */
32 
33 
34 #include <linux/kref.h>
35 #include <linux/random.h>
36 #include <linux/debugfs.h>
37 #include <linux/export.h>
38 #include <linux/delay.h>
39 #include <rdma/ib_umem.h>
40 #include <rdma/ib_umem_odp.h>
41 #include <rdma/ib_verbs.h>
42 #include "mlx5_ib.h"
43 
44 enum {
45 	MAX_PENDING_REG_MR = 8,
46 };
47 
48 #define MLX5_UMR_ALIGN 2048
49 
50 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
51 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
52 static int mr_cache_max_order(struct mlx5_ib_dev *dev);
53 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr);
54 
55 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev)
56 {
57 	return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled);
58 }
59 
60 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
61 {
62 	int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
63 
64 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
65 		/* Wait until all page fault handlers using the mr complete. */
66 		synchronize_srcu(&dev->mr_srcu);
67 
68 	return err;
69 }
70 
71 static int order2idx(struct mlx5_ib_dev *dev, int order)
72 {
73 	struct mlx5_mr_cache *cache = &dev->cache;
74 
75 	if (order < cache->ent[0].order)
76 		return 0;
77 	else
78 		return order - cache->ent[0].order;
79 }
80 
81 static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length)
82 {
83 	return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >=
84 		length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1));
85 }
86 
87 static void reg_mr_callback(int status, struct mlx5_async_work *context)
88 {
89 	struct mlx5_ib_mr *mr =
90 		container_of(context, struct mlx5_ib_mr, cb_work);
91 	struct mlx5_ib_dev *dev = mr->dev;
92 	struct mlx5_mr_cache *cache = &dev->cache;
93 	int c = order2idx(dev, mr->order);
94 	struct mlx5_cache_ent *ent = &cache->ent[c];
95 	u8 key;
96 	unsigned long flags;
97 	struct xarray *mkeys = &dev->mdev->priv.mkey_table;
98 	int err;
99 
100 	spin_lock_irqsave(&ent->lock, flags);
101 	ent->pending--;
102 	spin_unlock_irqrestore(&ent->lock, flags);
103 	if (status) {
104 		mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status);
105 		kfree(mr);
106 		dev->fill_delay = 1;
107 		mod_timer(&dev->delay_timer, jiffies + HZ);
108 		return;
109 	}
110 
111 	mr->mmkey.type = MLX5_MKEY_MR;
112 	spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags);
113 	key = dev->mdev->priv.mkey_key++;
114 	spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags);
115 	mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key;
116 
117 	cache->last_add = jiffies;
118 
119 	spin_lock_irqsave(&ent->lock, flags);
120 	list_add_tail(&mr->list, &ent->head);
121 	ent->cur++;
122 	ent->size++;
123 	spin_unlock_irqrestore(&ent->lock, flags);
124 
125 	xa_lock_irqsave(mkeys, flags);
126 	err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key),
127 				&mr->mmkey, GFP_ATOMIC));
128 	xa_unlock_irqrestore(mkeys, flags);
129 	if (err)
130 		pr_err("Error inserting to mkey tree. 0x%x\n", -err);
131 
132 	if (!completion_done(&ent->compl))
133 		complete(&ent->compl);
134 }
135 
136 static int add_keys(struct mlx5_ib_dev *dev, int c, int num)
137 {
138 	struct mlx5_mr_cache *cache = &dev->cache;
139 	struct mlx5_cache_ent *ent = &cache->ent[c];
140 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
141 	struct mlx5_ib_mr *mr;
142 	void *mkc;
143 	u32 *in;
144 	int err = 0;
145 	int i;
146 
147 	in = kzalloc(inlen, GFP_KERNEL);
148 	if (!in)
149 		return -ENOMEM;
150 
151 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
152 	for (i = 0; i < num; i++) {
153 		if (ent->pending >= MAX_PENDING_REG_MR) {
154 			err = -EAGAIN;
155 			break;
156 		}
157 
158 		mr = kzalloc(sizeof(*mr), GFP_KERNEL);
159 		if (!mr) {
160 			err = -ENOMEM;
161 			break;
162 		}
163 		mr->order = ent->order;
164 		mr->allocated_from_cache = 1;
165 		mr->dev = dev;
166 
167 		MLX5_SET(mkc, mkc, free, 1);
168 		MLX5_SET(mkc, mkc, umr_en, 1);
169 		MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3);
170 		MLX5_SET(mkc, mkc, access_mode_4_2,
171 			 (ent->access_mode >> 2) & 0x7);
172 
173 		MLX5_SET(mkc, mkc, qpn, 0xffffff);
174 		MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt);
175 		MLX5_SET(mkc, mkc, log_page_size, ent->page);
176 
177 		spin_lock_irq(&ent->lock);
178 		ent->pending++;
179 		spin_unlock_irq(&ent->lock);
180 		err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey,
181 					       &dev->async_ctx, in, inlen,
182 					       mr->out, sizeof(mr->out),
183 					       reg_mr_callback, &mr->cb_work);
184 		if (err) {
185 			spin_lock_irq(&ent->lock);
186 			ent->pending--;
187 			spin_unlock_irq(&ent->lock);
188 			mlx5_ib_warn(dev, "create mkey failed %d\n", err);
189 			kfree(mr);
190 			break;
191 		}
192 	}
193 
194 	kfree(in);
195 	return err;
196 }
197 
198 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num)
199 {
200 	struct mlx5_mr_cache *cache = &dev->cache;
201 	struct mlx5_cache_ent *ent = &cache->ent[c];
202 	struct mlx5_ib_mr *tmp_mr;
203 	struct mlx5_ib_mr *mr;
204 	LIST_HEAD(del_list);
205 	int i;
206 
207 	for (i = 0; i < num; i++) {
208 		spin_lock_irq(&ent->lock);
209 		if (list_empty(&ent->head)) {
210 			spin_unlock_irq(&ent->lock);
211 			break;
212 		}
213 		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
214 		list_move(&mr->list, &del_list);
215 		ent->cur--;
216 		ent->size--;
217 		spin_unlock_irq(&ent->lock);
218 		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
219 	}
220 
221 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
222 		synchronize_srcu(&dev->mr_srcu);
223 
224 	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
225 		list_del(&mr->list);
226 		kfree(mr);
227 	}
228 }
229 
230 static ssize_t size_write(struct file *filp, const char __user *buf,
231 			  size_t count, loff_t *pos)
232 {
233 	struct mlx5_cache_ent *ent = filp->private_data;
234 	struct mlx5_ib_dev *dev = ent->dev;
235 	char lbuf[20] = {0};
236 	u32 var;
237 	int err;
238 	int c;
239 
240 	count = min(count, sizeof(lbuf) - 1);
241 	if (copy_from_user(lbuf, buf, count))
242 		return -EFAULT;
243 
244 	c = order2idx(dev, ent->order);
245 
246 	if (sscanf(lbuf, "%u", &var) != 1)
247 		return -EINVAL;
248 
249 	if (var < ent->limit)
250 		return -EINVAL;
251 
252 	if (var > ent->size) {
253 		do {
254 			err = add_keys(dev, c, var - ent->size);
255 			if (err && err != -EAGAIN)
256 				return err;
257 
258 			usleep_range(3000, 5000);
259 		} while (err);
260 	} else if (var < ent->size) {
261 		remove_keys(dev, c, ent->size - var);
262 	}
263 
264 	return count;
265 }
266 
267 static ssize_t size_read(struct file *filp, char __user *buf, size_t count,
268 			 loff_t *pos)
269 {
270 	struct mlx5_cache_ent *ent = filp->private_data;
271 	char lbuf[20];
272 	int err;
273 
274 	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size);
275 	if (err < 0)
276 		return err;
277 
278 	return simple_read_from_buffer(buf, count, pos, lbuf, err);
279 }
280 
281 static const struct file_operations size_fops = {
282 	.owner	= THIS_MODULE,
283 	.open	= simple_open,
284 	.write	= size_write,
285 	.read	= size_read,
286 };
287 
288 static ssize_t limit_write(struct file *filp, const char __user *buf,
289 			   size_t count, loff_t *pos)
290 {
291 	struct mlx5_cache_ent *ent = filp->private_data;
292 	struct mlx5_ib_dev *dev = ent->dev;
293 	char lbuf[20] = {0};
294 	u32 var;
295 	int err;
296 	int c;
297 
298 	count = min(count, sizeof(lbuf) - 1);
299 	if (copy_from_user(lbuf, buf, count))
300 		return -EFAULT;
301 
302 	c = order2idx(dev, ent->order);
303 
304 	if (sscanf(lbuf, "%u", &var) != 1)
305 		return -EINVAL;
306 
307 	if (var > ent->size)
308 		return -EINVAL;
309 
310 	ent->limit = var;
311 
312 	if (ent->cur < ent->limit) {
313 		err = add_keys(dev, c, 2 * ent->limit - ent->cur);
314 		if (err)
315 			return err;
316 	}
317 
318 	return count;
319 }
320 
321 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count,
322 			  loff_t *pos)
323 {
324 	struct mlx5_cache_ent *ent = filp->private_data;
325 	char lbuf[20];
326 	int err;
327 
328 	err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit);
329 	if (err < 0)
330 		return err;
331 
332 	return simple_read_from_buffer(buf, count, pos, lbuf, err);
333 }
334 
335 static const struct file_operations limit_fops = {
336 	.owner	= THIS_MODULE,
337 	.open	= simple_open,
338 	.write	= limit_write,
339 	.read	= limit_read,
340 };
341 
342 static int someone_adding(struct mlx5_mr_cache *cache)
343 {
344 	int i;
345 
346 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
347 		if (cache->ent[i].cur < cache->ent[i].limit)
348 			return 1;
349 	}
350 
351 	return 0;
352 }
353 
354 static void __cache_work_func(struct mlx5_cache_ent *ent)
355 {
356 	struct mlx5_ib_dev *dev = ent->dev;
357 	struct mlx5_mr_cache *cache = &dev->cache;
358 	int i = order2idx(dev, ent->order);
359 	int err;
360 
361 	if (cache->stopped)
362 		return;
363 
364 	ent = &dev->cache.ent[i];
365 	if (ent->cur < 2 * ent->limit && !dev->fill_delay) {
366 		err = add_keys(dev, i, 1);
367 		if (ent->cur < 2 * ent->limit) {
368 			if (err == -EAGAIN) {
369 				mlx5_ib_dbg(dev, "returned eagain, order %d\n",
370 					    i + 2);
371 				queue_delayed_work(cache->wq, &ent->dwork,
372 						   msecs_to_jiffies(3));
373 			} else if (err) {
374 				mlx5_ib_warn(dev, "command failed order %d, err %d\n",
375 					     i + 2, err);
376 				queue_delayed_work(cache->wq, &ent->dwork,
377 						   msecs_to_jiffies(1000));
378 			} else {
379 				queue_work(cache->wq, &ent->work);
380 			}
381 		}
382 	} else if (ent->cur > 2 * ent->limit) {
383 		/*
384 		 * The remove_keys() logic is performed as garbage collection
385 		 * task. Such task is intended to be run when no other active
386 		 * processes are running.
387 		 *
388 		 * The need_resched() will return TRUE if there are user tasks
389 		 * to be activated in near future.
390 		 *
391 		 * In such case, we don't execute remove_keys() and postpone
392 		 * the garbage collection work to try to run in next cycle,
393 		 * in order to free CPU resources to other tasks.
394 		 */
395 		if (!need_resched() && !someone_adding(cache) &&
396 		    time_after(jiffies, cache->last_add + 300 * HZ)) {
397 			remove_keys(dev, i, 1);
398 			if (ent->cur > ent->limit)
399 				queue_work(cache->wq, &ent->work);
400 		} else {
401 			queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ);
402 		}
403 	}
404 }
405 
406 static void delayed_cache_work_func(struct work_struct *work)
407 {
408 	struct mlx5_cache_ent *ent;
409 
410 	ent = container_of(work, struct mlx5_cache_ent, dwork.work);
411 	__cache_work_func(ent);
412 }
413 
414 static void cache_work_func(struct work_struct *work)
415 {
416 	struct mlx5_cache_ent *ent;
417 
418 	ent = container_of(work, struct mlx5_cache_ent, work);
419 	__cache_work_func(ent);
420 }
421 
422 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry)
423 {
424 	struct mlx5_mr_cache *cache = &dev->cache;
425 	struct mlx5_cache_ent *ent;
426 	struct mlx5_ib_mr *mr;
427 	int err;
428 
429 	if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) {
430 		mlx5_ib_err(dev, "cache entry %d is out of range\n", entry);
431 		return NULL;
432 	}
433 
434 	ent = &cache->ent[entry];
435 	while (1) {
436 		spin_lock_irq(&ent->lock);
437 		if (list_empty(&ent->head)) {
438 			spin_unlock_irq(&ent->lock);
439 
440 			err = add_keys(dev, entry, 1);
441 			if (err && err != -EAGAIN)
442 				return ERR_PTR(err);
443 
444 			wait_for_completion(&ent->compl);
445 		} else {
446 			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
447 					      list);
448 			list_del(&mr->list);
449 			ent->cur--;
450 			spin_unlock_irq(&ent->lock);
451 			if (ent->cur < ent->limit)
452 				queue_work(cache->wq, &ent->work);
453 			return mr;
454 		}
455 	}
456 }
457 
458 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order)
459 {
460 	struct mlx5_mr_cache *cache = &dev->cache;
461 	struct mlx5_ib_mr *mr = NULL;
462 	struct mlx5_cache_ent *ent;
463 	int last_umr_cache_entry;
464 	int c;
465 	int i;
466 
467 	c = order2idx(dev, order);
468 	last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev));
469 	if (c < 0 || c > last_umr_cache_entry) {
470 		mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c);
471 		return NULL;
472 	}
473 
474 	for (i = c; i <= last_umr_cache_entry; i++) {
475 		ent = &cache->ent[i];
476 
477 		mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i);
478 
479 		spin_lock_irq(&ent->lock);
480 		if (!list_empty(&ent->head)) {
481 			mr = list_first_entry(&ent->head, struct mlx5_ib_mr,
482 					      list);
483 			list_del(&mr->list);
484 			ent->cur--;
485 			spin_unlock_irq(&ent->lock);
486 			if (ent->cur < ent->limit)
487 				queue_work(cache->wq, &ent->work);
488 			break;
489 		}
490 		spin_unlock_irq(&ent->lock);
491 
492 		queue_work(cache->wq, &ent->work);
493 	}
494 
495 	if (!mr)
496 		cache->ent[c].miss++;
497 
498 	return mr;
499 }
500 
501 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
502 {
503 	struct mlx5_mr_cache *cache = &dev->cache;
504 	struct mlx5_cache_ent *ent;
505 	int shrink = 0;
506 	int c;
507 
508 	if (!mr->allocated_from_cache)
509 		return;
510 
511 	c = order2idx(dev, mr->order);
512 	WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES);
513 
514 	if (unreg_umr(dev, mr)) {
515 		mr->allocated_from_cache = false;
516 		destroy_mkey(dev, mr);
517 		ent = &cache->ent[c];
518 		if (ent->cur < ent->limit)
519 			queue_work(cache->wq, &ent->work);
520 		return;
521 	}
522 
523 	ent = &cache->ent[c];
524 	spin_lock_irq(&ent->lock);
525 	list_add_tail(&mr->list, &ent->head);
526 	ent->cur++;
527 	if (ent->cur > 2 * ent->limit)
528 		shrink = 1;
529 	spin_unlock_irq(&ent->lock);
530 
531 	if (shrink)
532 		queue_work(cache->wq, &ent->work);
533 }
534 
535 static void clean_keys(struct mlx5_ib_dev *dev, int c)
536 {
537 	struct mlx5_mr_cache *cache = &dev->cache;
538 	struct mlx5_cache_ent *ent = &cache->ent[c];
539 	struct mlx5_ib_mr *tmp_mr;
540 	struct mlx5_ib_mr *mr;
541 	LIST_HEAD(del_list);
542 
543 	cancel_delayed_work(&ent->dwork);
544 	while (1) {
545 		spin_lock_irq(&ent->lock);
546 		if (list_empty(&ent->head)) {
547 			spin_unlock_irq(&ent->lock);
548 			break;
549 		}
550 		mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list);
551 		list_move(&mr->list, &del_list);
552 		ent->cur--;
553 		ent->size--;
554 		spin_unlock_irq(&ent->lock);
555 		mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey);
556 	}
557 
558 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING
559 	synchronize_srcu(&dev->mr_srcu);
560 #endif
561 
562 	list_for_each_entry_safe(mr, tmp_mr, &del_list, list) {
563 		list_del(&mr->list);
564 		kfree(mr);
565 	}
566 }
567 
568 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev)
569 {
570 	if (!mlx5_debugfs_root || dev->is_rep)
571 		return;
572 
573 	debugfs_remove_recursive(dev->cache.root);
574 	dev->cache.root = NULL;
575 }
576 
577 static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev)
578 {
579 	struct mlx5_mr_cache *cache = &dev->cache;
580 	struct mlx5_cache_ent *ent;
581 	struct dentry *dir;
582 	int i;
583 
584 	if (!mlx5_debugfs_root || dev->is_rep)
585 		return;
586 
587 	cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root);
588 
589 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
590 		ent = &cache->ent[i];
591 		sprintf(ent->name, "%d", ent->order);
592 		dir = debugfs_create_dir(ent->name, cache->root);
593 		debugfs_create_file("size", 0600, dir, ent, &size_fops);
594 		debugfs_create_file("limit", 0600, dir, ent, &limit_fops);
595 		debugfs_create_u32("cur", 0400, dir, &ent->cur);
596 		debugfs_create_u32("miss", 0600, dir, &ent->miss);
597 	}
598 }
599 
600 static void delay_time_func(struct timer_list *t)
601 {
602 	struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer);
603 
604 	dev->fill_delay = 0;
605 }
606 
607 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev)
608 {
609 	struct mlx5_mr_cache *cache = &dev->cache;
610 	struct mlx5_cache_ent *ent;
611 	int i;
612 
613 	mutex_init(&dev->slow_path_mutex);
614 	cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM);
615 	if (!cache->wq) {
616 		mlx5_ib_warn(dev, "failed to create work queue\n");
617 		return -ENOMEM;
618 	}
619 
620 	mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx);
621 	timer_setup(&dev->delay_timer, delay_time_func, 0);
622 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) {
623 		ent = &cache->ent[i];
624 		INIT_LIST_HEAD(&ent->head);
625 		spin_lock_init(&ent->lock);
626 		ent->order = i + 2;
627 		ent->dev = dev;
628 		ent->limit = 0;
629 
630 		init_completion(&ent->compl);
631 		INIT_WORK(&ent->work, cache_work_func);
632 		INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func);
633 
634 		if (i > MR_CACHE_LAST_STD_ENTRY) {
635 			mlx5_odp_init_mr_cache_entry(ent);
636 			continue;
637 		}
638 
639 		if (ent->order > mr_cache_max_order(dev))
640 			continue;
641 
642 		ent->page = PAGE_SHIFT;
643 		ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) /
644 			   MLX5_IB_UMR_OCTOWORD;
645 		ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT;
646 		if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) &&
647 		    !dev->is_rep &&
648 		    mlx5_core_is_pf(dev->mdev))
649 			ent->limit = dev->mdev->profile->mr_cache[i].limit;
650 		else
651 			ent->limit = 0;
652 		queue_work(cache->wq, &ent->work);
653 	}
654 
655 	mlx5_mr_cache_debugfs_init(dev);
656 
657 	return 0;
658 }
659 
660 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev)
661 {
662 	int i;
663 
664 	if (!dev->cache.wq)
665 		return 0;
666 
667 	dev->cache.stopped = 1;
668 	flush_workqueue(dev->cache.wq);
669 
670 	mlx5_mr_cache_debugfs_cleanup(dev);
671 	mlx5_cmd_cleanup_async_ctx(&dev->async_ctx);
672 
673 	for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++)
674 		clean_keys(dev, i);
675 
676 	destroy_workqueue(dev->cache.wq);
677 	del_timer_sync(&dev->delay_timer);
678 
679 	return 0;
680 }
681 
682 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc)
683 {
684 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
685 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
686 	struct mlx5_core_dev *mdev = dev->mdev;
687 	struct mlx5_ib_mr *mr;
688 	void *mkc;
689 	u32 *in;
690 	int err;
691 
692 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
693 	if (!mr)
694 		return ERR_PTR(-ENOMEM);
695 
696 	in = kzalloc(inlen, GFP_KERNEL);
697 	if (!in) {
698 		err = -ENOMEM;
699 		goto err_free;
700 	}
701 
702 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
703 
704 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA);
705 	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
706 	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
707 	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
708 	MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
709 	MLX5_SET(mkc, mkc, lr, 1);
710 
711 	MLX5_SET(mkc, mkc, length64, 1);
712 	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
713 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
714 	MLX5_SET64(mkc, mkc, start_addr, 0);
715 
716 	err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
717 	if (err)
718 		goto err_in;
719 
720 	kfree(in);
721 	mr->mmkey.type = MLX5_MKEY_MR;
722 	mr->ibmr.lkey = mr->mmkey.key;
723 	mr->ibmr.rkey = mr->mmkey.key;
724 	mr->umem = NULL;
725 
726 	return &mr->ibmr;
727 
728 err_in:
729 	kfree(in);
730 
731 err_free:
732 	kfree(mr);
733 
734 	return ERR_PTR(err);
735 }
736 
737 static int get_octo_len(u64 addr, u64 len, int page_shift)
738 {
739 	u64 page_size = 1ULL << page_shift;
740 	u64 offset;
741 	int npages;
742 
743 	offset = addr & (page_size - 1);
744 	npages = ALIGN(len + offset, page_size) >> page_shift;
745 	return (npages + 1) / 2;
746 }
747 
748 static int mr_cache_max_order(struct mlx5_ib_dev *dev)
749 {
750 	if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset))
751 		return MR_CACHE_LAST_STD_ENTRY + 2;
752 	return MLX5_MAX_UMR_SHIFT;
753 }
754 
755 static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata,
756 		       u64 start, u64 length, int access_flags,
757 		       struct ib_umem **umem, int *npages, int *page_shift,
758 		       int *ncont, int *order)
759 {
760 	struct ib_umem *u;
761 
762 	*umem = NULL;
763 
764 	if (access_flags & IB_ACCESS_ON_DEMAND) {
765 		struct ib_umem_odp *odp;
766 
767 		odp = ib_umem_odp_get(udata, start, length, access_flags);
768 		if (IS_ERR(odp)) {
769 			mlx5_ib_dbg(dev, "umem get failed (%ld)\n",
770 				    PTR_ERR(odp));
771 			return PTR_ERR(odp);
772 		}
773 
774 		u = &odp->umem;
775 
776 		*page_shift = odp->page_shift;
777 		*ncont = ib_umem_odp_num_pages(odp);
778 		*npages = *ncont << (*page_shift - PAGE_SHIFT);
779 		if (order)
780 			*order = ilog2(roundup_pow_of_two(*ncont));
781 	} else {
782 		u = ib_umem_get(udata, start, length, access_flags, 0);
783 		if (IS_ERR(u)) {
784 			mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u));
785 			return PTR_ERR(u);
786 		}
787 
788 		mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages,
789 				   page_shift, ncont, order);
790 	}
791 
792 	if (!*npages) {
793 		mlx5_ib_warn(dev, "avoid zero region\n");
794 		ib_umem_release(u);
795 		return -EINVAL;
796 	}
797 
798 	*umem = u;
799 
800 	mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n",
801 		    *npages, *ncont, *order, *page_shift);
802 
803 	return 0;
804 }
805 
806 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc)
807 {
808 	struct mlx5_ib_umr_context *context =
809 		container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe);
810 
811 	context->status = wc->status;
812 	complete(&context->done);
813 }
814 
815 static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context)
816 {
817 	context->cqe.done = mlx5_ib_umr_done;
818 	context->status = -1;
819 	init_completion(&context->done);
820 }
821 
822 static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev,
823 				  struct mlx5_umr_wr *umrwr)
824 {
825 	struct umr_common *umrc = &dev->umrc;
826 	const struct ib_send_wr *bad;
827 	int err;
828 	struct mlx5_ib_umr_context umr_context;
829 
830 	mlx5_ib_init_umr_context(&umr_context);
831 	umrwr->wr.wr_cqe = &umr_context.cqe;
832 
833 	down(&umrc->sem);
834 	err = ib_post_send(umrc->qp, &umrwr->wr, &bad);
835 	if (err) {
836 		mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err);
837 	} else {
838 		wait_for_completion(&umr_context.done);
839 		if (umr_context.status != IB_WC_SUCCESS) {
840 			mlx5_ib_warn(dev, "reg umr failed (%u)\n",
841 				     umr_context.status);
842 			err = -EFAULT;
843 		}
844 	}
845 	up(&umrc->sem);
846 	return err;
847 }
848 
849 static struct mlx5_ib_mr *alloc_mr_from_cache(
850 				  struct ib_pd *pd, struct ib_umem *umem,
851 				  u64 virt_addr, u64 len, int npages,
852 				  int page_shift, int order, int access_flags)
853 {
854 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
855 	struct mlx5_ib_mr *mr;
856 	int err = 0;
857 	int i;
858 
859 	for (i = 0; i < 1; i++) {
860 		mr = alloc_cached_mr(dev, order);
861 		if (mr)
862 			break;
863 
864 		err = add_keys(dev, order2idx(dev, order), 1);
865 		if (err && err != -EAGAIN) {
866 			mlx5_ib_warn(dev, "add_keys failed, err %d\n", err);
867 			break;
868 		}
869 	}
870 
871 	if (!mr)
872 		return ERR_PTR(-EAGAIN);
873 
874 	mr->ibmr.pd = pd;
875 	mr->umem = umem;
876 	mr->access_flags = access_flags;
877 	mr->desc_size = sizeof(struct mlx5_mtt);
878 	mr->mmkey.iova = virt_addr;
879 	mr->mmkey.size = len;
880 	mr->mmkey.pd = to_mpd(pd)->pdn;
881 
882 	return mr;
883 }
884 
885 static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages,
886 			       void *xlt, int page_shift, size_t size,
887 			       int flags)
888 {
889 	struct mlx5_ib_dev *dev = mr->dev;
890 	struct ib_umem *umem = mr->umem;
891 
892 	if (flags & MLX5_IB_UPD_XLT_INDIRECT) {
893 		if (!umr_can_use_indirect_mkey(dev))
894 			return -EPERM;
895 		mlx5_odp_populate_klm(xlt, idx, npages, mr, flags);
896 		return npages;
897 	}
898 
899 	npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx);
900 
901 	if (!(flags & MLX5_IB_UPD_XLT_ZAP)) {
902 		__mlx5_ib_populate_pas(dev, umem, page_shift,
903 				       idx, npages, xlt,
904 				       MLX5_IB_MTT_PRESENT);
905 		/* Clear padding after the pages
906 		 * brought from the umem.
907 		 */
908 		memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0,
909 		       size - npages * sizeof(struct mlx5_mtt));
910 	}
911 
912 	return npages;
913 }
914 
915 #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \
916 			    MLX5_UMR_MTT_ALIGNMENT)
917 #define MLX5_SPARE_UMR_CHUNK 0x10000
918 
919 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages,
920 		       int page_shift, int flags)
921 {
922 	struct mlx5_ib_dev *dev = mr->dev;
923 	struct device *ddev = dev->ib_dev.dev.parent;
924 	int size;
925 	void *xlt;
926 	dma_addr_t dma;
927 	struct mlx5_umr_wr wr;
928 	struct ib_sge sg;
929 	int err = 0;
930 	int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT)
931 			       ? sizeof(struct mlx5_klm)
932 			       : sizeof(struct mlx5_mtt);
933 	const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size;
934 	const int page_mask = page_align - 1;
935 	size_t pages_mapped = 0;
936 	size_t pages_to_map = 0;
937 	size_t pages_iter = 0;
938 	gfp_t gfp;
939 	bool use_emergency_page = false;
940 
941 	if ((flags & MLX5_IB_UPD_XLT_INDIRECT) &&
942 	    !umr_can_use_indirect_mkey(dev))
943 		return -EPERM;
944 
945 	/* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes,
946 	 * so we need to align the offset and length accordingly
947 	 */
948 	if (idx & page_mask) {
949 		npages += idx & page_mask;
950 		idx &= ~page_mask;
951 	}
952 
953 	gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL;
954 	gfp |= __GFP_ZERO | __GFP_NOWARN;
955 
956 	pages_to_map = ALIGN(npages, page_align);
957 	size = desc_size * pages_to_map;
958 	size = min_t(int, size, MLX5_MAX_UMR_CHUNK);
959 
960 	xlt = (void *)__get_free_pages(gfp, get_order(size));
961 	if (!xlt && size > MLX5_SPARE_UMR_CHUNK) {
962 		mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n",
963 			    size, get_order(size), MLX5_SPARE_UMR_CHUNK);
964 
965 		size = MLX5_SPARE_UMR_CHUNK;
966 		xlt = (void *)__get_free_pages(gfp, get_order(size));
967 	}
968 
969 	if (!xlt) {
970 		mlx5_ib_warn(dev, "Using XLT emergency buffer\n");
971 		xlt = (void *)mlx5_ib_get_xlt_emergency_page();
972 		size = PAGE_SIZE;
973 		memset(xlt, 0, size);
974 		use_emergency_page = true;
975 	}
976 	pages_iter = size / desc_size;
977 	dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE);
978 	if (dma_mapping_error(ddev, dma)) {
979 		mlx5_ib_err(dev, "unable to map DMA during XLT update.\n");
980 		err = -ENOMEM;
981 		goto free_xlt;
982 	}
983 
984 	sg.addr = dma;
985 	sg.lkey = dev->umrc.pd->local_dma_lkey;
986 
987 	memset(&wr, 0, sizeof(wr));
988 	wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT;
989 	if (!(flags & MLX5_IB_UPD_XLT_ENABLE))
990 		wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE;
991 	wr.wr.sg_list = &sg;
992 	wr.wr.num_sge = 1;
993 	wr.wr.opcode = MLX5_IB_WR_UMR;
994 
995 	wr.pd = mr->ibmr.pd;
996 	wr.mkey = mr->mmkey.key;
997 	wr.length = mr->mmkey.size;
998 	wr.virt_addr = mr->mmkey.iova;
999 	wr.access_flags = mr->access_flags;
1000 	wr.page_shift = page_shift;
1001 
1002 	for (pages_mapped = 0;
1003 	     pages_mapped < pages_to_map && !err;
1004 	     pages_mapped += pages_iter, idx += pages_iter) {
1005 		npages = min_t(int, pages_iter, pages_to_map - pages_mapped);
1006 		dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE);
1007 		npages = populate_xlt(mr, idx, npages, xlt,
1008 				      page_shift, size, flags);
1009 
1010 		dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE);
1011 
1012 		sg.length = ALIGN(npages * desc_size,
1013 				  MLX5_UMR_MTT_ALIGNMENT);
1014 
1015 		if (pages_mapped + pages_iter >= pages_to_map) {
1016 			if (flags & MLX5_IB_UPD_XLT_ENABLE)
1017 				wr.wr.send_flags |=
1018 					MLX5_IB_SEND_UMR_ENABLE_MR |
1019 					MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS |
1020 					MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1021 			if (flags & MLX5_IB_UPD_XLT_PD ||
1022 			    flags & MLX5_IB_UPD_XLT_ACCESS)
1023 				wr.wr.send_flags |=
1024 					MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1025 			if (flags & MLX5_IB_UPD_XLT_ADDR)
1026 				wr.wr.send_flags |=
1027 					MLX5_IB_SEND_UMR_UPDATE_TRANSLATION;
1028 		}
1029 
1030 		wr.offset = idx * desc_size;
1031 		wr.xlt_size = sg.length;
1032 
1033 		err = mlx5_ib_post_send_wait(dev, &wr);
1034 	}
1035 	dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE);
1036 
1037 free_xlt:
1038 	if (use_emergency_page)
1039 		mlx5_ib_put_xlt_emergency_page();
1040 	else
1041 		free_pages((unsigned long)xlt, get_order(size));
1042 
1043 	return err;
1044 }
1045 
1046 /*
1047  * If ibmr is NULL it will be allocated by reg_create.
1048  * Else, the given ibmr will be used.
1049  */
1050 static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd,
1051 				     u64 virt_addr, u64 length,
1052 				     struct ib_umem *umem, int npages,
1053 				     int page_shift, int access_flags,
1054 				     bool populate)
1055 {
1056 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1057 	struct mlx5_ib_mr *mr;
1058 	__be64 *pas;
1059 	void *mkc;
1060 	int inlen;
1061 	u32 *in;
1062 	int err;
1063 	bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg));
1064 
1065 	mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL);
1066 	if (!mr)
1067 		return ERR_PTR(-ENOMEM);
1068 
1069 	mr->ibmr.pd = pd;
1070 	mr->access_flags = access_flags;
1071 
1072 	inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1073 	if (populate)
1074 		inlen += sizeof(*pas) * roundup(npages, 2);
1075 	in = kvzalloc(inlen, GFP_KERNEL);
1076 	if (!in) {
1077 		err = -ENOMEM;
1078 		goto err_1;
1079 	}
1080 	pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt);
1081 	if (populate && !(access_flags & IB_ACCESS_ON_DEMAND))
1082 		mlx5_ib_populate_pas(dev, umem, page_shift, pas,
1083 				     pg_cap ? MLX5_IB_MTT_PRESENT : 0);
1084 
1085 	/* The pg_access bit allows setting the access flags
1086 	 * in the page list submitted with the command. */
1087 	MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap));
1088 
1089 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1090 	MLX5_SET(mkc, mkc, free, !populate);
1091 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT);
1092 	MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC));
1093 	MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE));
1094 	MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ));
1095 	MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE));
1096 	MLX5_SET(mkc, mkc, lr, 1);
1097 	MLX5_SET(mkc, mkc, umr_en, 1);
1098 
1099 	MLX5_SET64(mkc, mkc, start_addr, virt_addr);
1100 	MLX5_SET64(mkc, mkc, len, length);
1101 	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1102 	MLX5_SET(mkc, mkc, bsf_octword_size, 0);
1103 	MLX5_SET(mkc, mkc, translations_octword_size,
1104 		 get_octo_len(virt_addr, length, page_shift));
1105 	MLX5_SET(mkc, mkc, log_page_size, page_shift);
1106 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1107 	if (populate) {
1108 		MLX5_SET(create_mkey_in, in, translations_octword_actual_size,
1109 			 get_octo_len(virt_addr, length, page_shift));
1110 	}
1111 
1112 	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1113 	if (err) {
1114 		mlx5_ib_warn(dev, "create mkey failed\n");
1115 		goto err_2;
1116 	}
1117 	mr->mmkey.type = MLX5_MKEY_MR;
1118 	mr->desc_size = sizeof(struct mlx5_mtt);
1119 	mr->dev = dev;
1120 	kvfree(in);
1121 
1122 	mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key);
1123 
1124 	return mr;
1125 
1126 err_2:
1127 	kvfree(in);
1128 
1129 err_1:
1130 	if (!ibmr)
1131 		kfree(mr);
1132 
1133 	return ERR_PTR(err);
1134 }
1135 
1136 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr,
1137 			  int npages, u64 length, int access_flags)
1138 {
1139 	mr->npages = npages;
1140 	atomic_add(npages, &dev->mdev->priv.reg_pages);
1141 	mr->ibmr.lkey = mr->mmkey.key;
1142 	mr->ibmr.rkey = mr->mmkey.key;
1143 	mr->ibmr.length = length;
1144 	mr->access_flags = access_flags;
1145 }
1146 
1147 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr,
1148 				       u64 length, int acc, int mode)
1149 {
1150 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1151 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1152 	struct mlx5_core_dev *mdev = dev->mdev;
1153 	struct mlx5_ib_mr *mr;
1154 	void *mkc;
1155 	u32 *in;
1156 	int err;
1157 
1158 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1159 	if (!mr)
1160 		return ERR_PTR(-ENOMEM);
1161 
1162 	in = kzalloc(inlen, GFP_KERNEL);
1163 	if (!in) {
1164 		err = -ENOMEM;
1165 		goto err_free;
1166 	}
1167 
1168 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1169 
1170 	MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3);
1171 	MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7);
1172 	MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC));
1173 	MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE));
1174 	MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ));
1175 	MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE));
1176 	MLX5_SET(mkc, mkc, lr, 1);
1177 
1178 	MLX5_SET64(mkc, mkc, len, length);
1179 	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1180 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1181 	MLX5_SET64(mkc, mkc, start_addr, start_addr);
1182 
1183 	err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen);
1184 	if (err)
1185 		goto err_in;
1186 
1187 	kfree(in);
1188 
1189 	mr->umem = NULL;
1190 	set_mr_fields(dev, mr, 0, length, acc);
1191 
1192 	return &mr->ibmr;
1193 
1194 err_in:
1195 	kfree(in);
1196 
1197 err_free:
1198 	kfree(mr);
1199 
1200 	return ERR_PTR(err);
1201 }
1202 
1203 int mlx5_ib_advise_mr(struct ib_pd *pd,
1204 		      enum ib_uverbs_advise_mr_advice advice,
1205 		      u32 flags,
1206 		      struct ib_sge *sg_list,
1207 		      u32 num_sge,
1208 		      struct uverbs_attr_bundle *attrs)
1209 {
1210 	if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH &&
1211 	    advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE)
1212 		return -EOPNOTSUPP;
1213 
1214 	return mlx5_ib_advise_mr_prefetch(pd, advice, flags,
1215 					 sg_list, num_sge);
1216 }
1217 
1218 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm,
1219 				struct ib_dm_mr_attr *attr,
1220 				struct uverbs_attr_bundle *attrs)
1221 {
1222 	struct mlx5_ib_dm *mdm = to_mdm(dm);
1223 	struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev;
1224 	u64 start_addr = mdm->dev_addr + attr->offset;
1225 	int mode;
1226 
1227 	switch (mdm->type) {
1228 	case MLX5_IB_UAPI_DM_TYPE_MEMIC:
1229 		if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS)
1230 			return ERR_PTR(-EINVAL);
1231 
1232 		mode = MLX5_MKC_ACCESS_MODE_MEMIC;
1233 		start_addr -= pci_resource_start(dev->pdev, 0);
1234 		break;
1235 	case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM:
1236 	case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM:
1237 		if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS)
1238 			return ERR_PTR(-EINVAL);
1239 
1240 		mode = MLX5_MKC_ACCESS_MODE_SW_ICM;
1241 		break;
1242 	default:
1243 		return ERR_PTR(-EINVAL);
1244 	}
1245 
1246 	return mlx5_ib_get_dm_mr(pd, start_addr, attr->length,
1247 				 attr->access_flags, mode);
1248 }
1249 
1250 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length,
1251 				  u64 virt_addr, int access_flags,
1252 				  struct ib_udata *udata)
1253 {
1254 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1255 	struct mlx5_ib_mr *mr = NULL;
1256 	bool use_umr;
1257 	struct ib_umem *umem;
1258 	int page_shift;
1259 	int npages;
1260 	int ncont;
1261 	int order;
1262 	int err;
1263 
1264 	if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM))
1265 		return ERR_PTR(-EOPNOTSUPP);
1266 
1267 	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1268 		    start, virt_addr, length, access_flags);
1269 
1270 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start &&
1271 	    length == U64_MAX) {
1272 		if (!(access_flags & IB_ACCESS_ON_DEMAND) ||
1273 		    !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT))
1274 			return ERR_PTR(-EINVAL);
1275 
1276 		mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags);
1277 		if (IS_ERR(mr))
1278 			return ERR_CAST(mr);
1279 		return &mr->ibmr;
1280 	}
1281 
1282 	err = mr_umem_get(dev, udata, start, length, access_flags, &umem,
1283 			  &npages, &page_shift, &ncont, &order);
1284 
1285 	if (err < 0)
1286 		return ERR_PTR(err);
1287 
1288 	use_umr = mlx5_ib_can_use_umr(dev, true);
1289 
1290 	if (order <= mr_cache_max_order(dev) && use_umr) {
1291 		mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont,
1292 					 page_shift, order, access_flags);
1293 		if (PTR_ERR(mr) == -EAGAIN) {
1294 			mlx5_ib_dbg(dev, "cache empty for order %d\n", order);
1295 			mr = NULL;
1296 		}
1297 	} else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) {
1298 		if (access_flags & IB_ACCESS_ON_DEMAND) {
1299 			err = -EINVAL;
1300 			pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n");
1301 			goto error;
1302 		}
1303 		use_umr = false;
1304 	}
1305 
1306 	if (!mr) {
1307 		mutex_lock(&dev->slow_path_mutex);
1308 		mr = reg_create(NULL, pd, virt_addr, length, umem, ncont,
1309 				page_shift, access_flags, !use_umr);
1310 		mutex_unlock(&dev->slow_path_mutex);
1311 	}
1312 
1313 	if (IS_ERR(mr)) {
1314 		err = PTR_ERR(mr);
1315 		goto error;
1316 	}
1317 
1318 	mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key);
1319 
1320 	mr->umem = umem;
1321 	set_mr_fields(dev, mr, npages, length, access_flags);
1322 
1323 	if (use_umr) {
1324 		int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE;
1325 
1326 		if (access_flags & IB_ACCESS_ON_DEMAND)
1327 			update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP;
1328 
1329 		err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift,
1330 					 update_xlt_flags);
1331 
1332 		if (err) {
1333 			dereg_mr(dev, mr);
1334 			return ERR_PTR(err);
1335 		}
1336 	}
1337 
1338 	if (is_odp_mr(mr)) {
1339 		to_ib_umem_odp(mr->umem)->private = mr;
1340 		atomic_set(&mr->num_pending_prefetch, 0);
1341 	}
1342 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING))
1343 		smp_store_release(&mr->live, 1);
1344 
1345 	return &mr->ibmr;
1346 error:
1347 	ib_umem_release(umem);
1348 	return ERR_PTR(err);
1349 }
1350 
1351 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1352 {
1353 	struct mlx5_core_dev *mdev = dev->mdev;
1354 	struct mlx5_umr_wr umrwr = {};
1355 
1356 	if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
1357 		return 0;
1358 
1359 	umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR |
1360 			      MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1361 	umrwr.wr.opcode = MLX5_IB_WR_UMR;
1362 	umrwr.pd = dev->umrc.pd;
1363 	umrwr.mkey = mr->mmkey.key;
1364 	umrwr.ignore_free_state = 1;
1365 
1366 	return mlx5_ib_post_send_wait(dev, &umrwr);
1367 }
1368 
1369 static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1370 		     int access_flags, int flags)
1371 {
1372 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1373 	struct mlx5_umr_wr umrwr = {};
1374 	int err;
1375 
1376 	umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE;
1377 
1378 	umrwr.wr.opcode = MLX5_IB_WR_UMR;
1379 	umrwr.mkey = mr->mmkey.key;
1380 
1381 	if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) {
1382 		umrwr.pd = pd;
1383 		umrwr.access_flags = access_flags;
1384 		umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS;
1385 	}
1386 
1387 	err = mlx5_ib_post_send_wait(dev, &umrwr);
1388 
1389 	return err;
1390 }
1391 
1392 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start,
1393 			  u64 length, u64 virt_addr, int new_access_flags,
1394 			  struct ib_pd *new_pd, struct ib_udata *udata)
1395 {
1396 	struct mlx5_ib_dev *dev = to_mdev(ib_mr->device);
1397 	struct mlx5_ib_mr *mr = to_mmr(ib_mr);
1398 	struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd;
1399 	int access_flags = flags & IB_MR_REREG_ACCESS ?
1400 			    new_access_flags :
1401 			    mr->access_flags;
1402 	int page_shift = 0;
1403 	int upd_flags = 0;
1404 	int npages = 0;
1405 	int ncont = 0;
1406 	int order = 0;
1407 	u64 addr, len;
1408 	int err;
1409 
1410 	mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n",
1411 		    start, virt_addr, length, access_flags);
1412 
1413 	atomic_sub(mr->npages, &dev->mdev->priv.reg_pages);
1414 
1415 	if (!mr->umem)
1416 		return -EINVAL;
1417 
1418 	if (is_odp_mr(mr))
1419 		return -EOPNOTSUPP;
1420 
1421 	if (flags & IB_MR_REREG_TRANS) {
1422 		addr = virt_addr;
1423 		len = length;
1424 	} else {
1425 		addr = mr->umem->address;
1426 		len = mr->umem->length;
1427 	}
1428 
1429 	if (flags != IB_MR_REREG_PD) {
1430 		/*
1431 		 * Replace umem. This needs to be done whether or not UMR is
1432 		 * used.
1433 		 */
1434 		flags |= IB_MR_REREG_TRANS;
1435 		ib_umem_release(mr->umem);
1436 		mr->umem = NULL;
1437 		err = mr_umem_get(dev, udata, addr, len, access_flags,
1438 				  &mr->umem, &npages, &page_shift, &ncont,
1439 				  &order);
1440 		if (err)
1441 			goto err;
1442 	}
1443 
1444 	if (!mlx5_ib_can_use_umr(dev, true) ||
1445 	    (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) {
1446 		/*
1447 		 * UMR can't be used - MKey needs to be replaced.
1448 		 */
1449 		if (mr->allocated_from_cache)
1450 			err = unreg_umr(dev, mr);
1451 		else
1452 			err = destroy_mkey(dev, mr);
1453 		if (err)
1454 			goto err;
1455 
1456 		mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont,
1457 				page_shift, access_flags, true);
1458 
1459 		if (IS_ERR(mr)) {
1460 			err = PTR_ERR(mr);
1461 			mr = to_mmr(ib_mr);
1462 			goto err;
1463 		}
1464 
1465 		mr->allocated_from_cache = 0;
1466 	} else {
1467 		/*
1468 		 * Send a UMR WQE
1469 		 */
1470 		mr->ibmr.pd = pd;
1471 		mr->access_flags = access_flags;
1472 		mr->mmkey.iova = addr;
1473 		mr->mmkey.size = len;
1474 		mr->mmkey.pd = to_mpd(pd)->pdn;
1475 
1476 		if (flags & IB_MR_REREG_TRANS) {
1477 			upd_flags = MLX5_IB_UPD_XLT_ADDR;
1478 			if (flags & IB_MR_REREG_PD)
1479 				upd_flags |= MLX5_IB_UPD_XLT_PD;
1480 			if (flags & IB_MR_REREG_ACCESS)
1481 				upd_flags |= MLX5_IB_UPD_XLT_ACCESS;
1482 			err = mlx5_ib_update_xlt(mr, 0, npages, page_shift,
1483 						 upd_flags);
1484 		} else {
1485 			err = rereg_umr(pd, mr, access_flags, flags);
1486 		}
1487 
1488 		if (err)
1489 			goto err;
1490 	}
1491 
1492 	set_mr_fields(dev, mr, npages, len, access_flags);
1493 
1494 	return 0;
1495 
1496 err:
1497 	ib_umem_release(mr->umem);
1498 	mr->umem = NULL;
1499 
1500 	clean_mr(dev, mr);
1501 	return err;
1502 }
1503 
1504 static int
1505 mlx5_alloc_priv_descs(struct ib_device *device,
1506 		      struct mlx5_ib_mr *mr,
1507 		      int ndescs,
1508 		      int desc_size)
1509 {
1510 	int size = ndescs * desc_size;
1511 	int add_size;
1512 	int ret;
1513 
1514 	add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0);
1515 
1516 	mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL);
1517 	if (!mr->descs_alloc)
1518 		return -ENOMEM;
1519 
1520 	mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN);
1521 
1522 	mr->desc_map = dma_map_single(device->dev.parent, mr->descs,
1523 				      size, DMA_TO_DEVICE);
1524 	if (dma_mapping_error(device->dev.parent, mr->desc_map)) {
1525 		ret = -ENOMEM;
1526 		goto err;
1527 	}
1528 
1529 	return 0;
1530 err:
1531 	kfree(mr->descs_alloc);
1532 
1533 	return ret;
1534 }
1535 
1536 static void
1537 mlx5_free_priv_descs(struct mlx5_ib_mr *mr)
1538 {
1539 	if (mr->descs) {
1540 		struct ib_device *device = mr->ibmr.device;
1541 		int size = mr->max_descs * mr->desc_size;
1542 
1543 		dma_unmap_single(device->dev.parent, mr->desc_map,
1544 				 size, DMA_TO_DEVICE);
1545 		kfree(mr->descs_alloc);
1546 		mr->descs = NULL;
1547 	}
1548 }
1549 
1550 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1551 {
1552 	int allocated_from_cache = mr->allocated_from_cache;
1553 
1554 	if (mr->sig) {
1555 		if (mlx5_core_destroy_psv(dev->mdev,
1556 					  mr->sig->psv_memory.psv_idx))
1557 			mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1558 				     mr->sig->psv_memory.psv_idx);
1559 		if (mlx5_core_destroy_psv(dev->mdev,
1560 					  mr->sig->psv_wire.psv_idx))
1561 			mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1562 				     mr->sig->psv_wire.psv_idx);
1563 		kfree(mr->sig);
1564 		mr->sig = NULL;
1565 	}
1566 
1567 	if (!allocated_from_cache) {
1568 		destroy_mkey(dev, mr);
1569 		mlx5_free_priv_descs(mr);
1570 	}
1571 }
1572 
1573 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr)
1574 {
1575 	int npages = mr->npages;
1576 	struct ib_umem *umem = mr->umem;
1577 
1578 	if (is_odp_mr(mr)) {
1579 		struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem);
1580 
1581 		/* Prevent new page faults and
1582 		 * prefetch requests from succeeding
1583 		 */
1584 		WRITE_ONCE(mr->live, 0);
1585 
1586 		/* Wait for all running page-fault handlers to finish. */
1587 		synchronize_srcu(&dev->mr_srcu);
1588 
1589 		/* dequeue pending prefetch requests for the mr */
1590 		if (atomic_read(&mr->num_pending_prefetch))
1591 			flush_workqueue(system_unbound_wq);
1592 		WARN_ON(atomic_read(&mr->num_pending_prefetch));
1593 
1594 		/* Destroy all page mappings */
1595 		if (!umem_odp->is_implicit_odp)
1596 			mlx5_ib_invalidate_range(umem_odp,
1597 						 ib_umem_start(umem_odp),
1598 						 ib_umem_end(umem_odp));
1599 		else
1600 			mlx5_ib_free_implicit_mr(mr);
1601 		/*
1602 		 * We kill the umem before the MR for ODP,
1603 		 * so that there will not be any invalidations in
1604 		 * flight, looking at the *mr struct.
1605 		 */
1606 		ib_umem_odp_release(umem_odp);
1607 		atomic_sub(npages, &dev->mdev->priv.reg_pages);
1608 
1609 		/* Avoid double-freeing the umem. */
1610 		umem = NULL;
1611 	}
1612 
1613 	clean_mr(dev, mr);
1614 
1615 	/*
1616 	 * We should unregister the DMA address from the HCA before
1617 	 * remove the DMA mapping.
1618 	 */
1619 	mlx5_mr_cache_free(dev, mr);
1620 	ib_umem_release(umem);
1621 	if (umem)
1622 		atomic_sub(npages, &dev->mdev->priv.reg_pages);
1623 
1624 	if (!mr->allocated_from_cache)
1625 		kfree(mr);
1626 }
1627 
1628 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata)
1629 {
1630 	struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1631 
1632 	if (ibmr->type == IB_MR_TYPE_INTEGRITY) {
1633 		dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr);
1634 		dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr);
1635 	}
1636 
1637 	dereg_mr(to_mdev(ibmr->device), mmr);
1638 
1639 	return 0;
1640 }
1641 
1642 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs,
1643 				   int access_mode, int page_shift)
1644 {
1645 	void *mkc;
1646 
1647 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1648 
1649 	MLX5_SET(mkc, mkc, free, 1);
1650 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1651 	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1652 	MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1653 	MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3);
1654 	MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7);
1655 	MLX5_SET(mkc, mkc, umr_en, 1);
1656 	MLX5_SET(mkc, mkc, log_page_size, page_shift);
1657 }
1658 
1659 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1660 				  int ndescs, int desc_size, int page_shift,
1661 				  int access_mode, u32 *in, int inlen)
1662 {
1663 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1664 	int err;
1665 
1666 	mr->access_mode = access_mode;
1667 	mr->desc_size = desc_size;
1668 	mr->max_descs = ndescs;
1669 
1670 	err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size);
1671 	if (err)
1672 		return err;
1673 
1674 	mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift);
1675 
1676 	err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen);
1677 	if (err)
1678 		goto err_free_descs;
1679 
1680 	mr->mmkey.type = MLX5_MKEY_MR;
1681 	mr->ibmr.lkey = mr->mmkey.key;
1682 	mr->ibmr.rkey = mr->mmkey.key;
1683 
1684 	return 0;
1685 
1686 err_free_descs:
1687 	mlx5_free_priv_descs(mr);
1688 	return err;
1689 }
1690 
1691 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd,
1692 				u32 max_num_sg, u32 max_num_meta_sg,
1693 				int desc_size, int access_mode)
1694 {
1695 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1696 	int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4);
1697 	int page_shift = 0;
1698 	struct mlx5_ib_mr *mr;
1699 	u32 *in;
1700 	int err;
1701 
1702 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1703 	if (!mr)
1704 		return ERR_PTR(-ENOMEM);
1705 
1706 	mr->ibmr.pd = pd;
1707 	mr->ibmr.device = pd->device;
1708 
1709 	in = kzalloc(inlen, GFP_KERNEL);
1710 	if (!in) {
1711 		err = -ENOMEM;
1712 		goto err_free;
1713 	}
1714 
1715 	if (access_mode == MLX5_MKC_ACCESS_MODE_MTT)
1716 		page_shift = PAGE_SHIFT;
1717 
1718 	err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift,
1719 				     access_mode, in, inlen);
1720 	if (err)
1721 		goto err_free_in;
1722 
1723 	mr->umem = NULL;
1724 	kfree(in);
1725 
1726 	return mr;
1727 
1728 err_free_in:
1729 	kfree(in);
1730 err_free:
1731 	kfree(mr);
1732 	return ERR_PTR(err);
1733 }
1734 
1735 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1736 				    int ndescs, u32 *in, int inlen)
1737 {
1738 	return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt),
1739 				      PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in,
1740 				      inlen);
1741 }
1742 
1743 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1744 				    int ndescs, u32 *in, int inlen)
1745 {
1746 	return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm),
1747 				      0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
1748 }
1749 
1750 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr,
1751 				      int max_num_sg, int max_num_meta_sg,
1752 				      u32 *in, int inlen)
1753 {
1754 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1755 	u32 psv_index[2];
1756 	void *mkc;
1757 	int err;
1758 
1759 	mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL);
1760 	if (!mr->sig)
1761 		return -ENOMEM;
1762 
1763 	/* create mem & wire PSVs */
1764 	err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index);
1765 	if (err)
1766 		goto err_free_sig;
1767 
1768 	mr->sig->psv_memory.psv_idx = psv_index[0];
1769 	mr->sig->psv_wire.psv_idx = psv_index[1];
1770 
1771 	mr->sig->sig_status_checked = true;
1772 	mr->sig->sig_err_exists = false;
1773 	/* Next UMR, Arm SIGERR */
1774 	++mr->sig->sigerr_count;
1775 	mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
1776 					 sizeof(struct mlx5_klm),
1777 					 MLX5_MKC_ACCESS_MODE_KLMS);
1778 	if (IS_ERR(mr->klm_mr)) {
1779 		err = PTR_ERR(mr->klm_mr);
1780 		goto err_destroy_psv;
1781 	}
1782 	mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg,
1783 					 sizeof(struct mlx5_mtt),
1784 					 MLX5_MKC_ACCESS_MODE_MTT);
1785 	if (IS_ERR(mr->mtt_mr)) {
1786 		err = PTR_ERR(mr->mtt_mr);
1787 		goto err_free_klm_mr;
1788 	}
1789 
1790 	/* Set bsf descriptors for mkey */
1791 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1792 	MLX5_SET(mkc, mkc, bsf_en, 1);
1793 	MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE);
1794 
1795 	err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0,
1796 				     MLX5_MKC_ACCESS_MODE_KLMS, in, inlen);
1797 	if (err)
1798 		goto err_free_mtt_mr;
1799 
1800 	return 0;
1801 
1802 err_free_mtt_mr:
1803 	dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr);
1804 	mr->mtt_mr = NULL;
1805 err_free_klm_mr:
1806 	dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr);
1807 	mr->klm_mr = NULL;
1808 err_destroy_psv:
1809 	if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx))
1810 		mlx5_ib_warn(dev, "failed to destroy mem psv %d\n",
1811 			     mr->sig->psv_memory.psv_idx);
1812 	if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx))
1813 		mlx5_ib_warn(dev, "failed to destroy wire psv %d\n",
1814 			     mr->sig->psv_wire.psv_idx);
1815 err_free_sig:
1816 	kfree(mr->sig);
1817 
1818 	return err;
1819 }
1820 
1821 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd,
1822 					enum ib_mr_type mr_type, u32 max_num_sg,
1823 					u32 max_num_meta_sg)
1824 {
1825 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1826 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1827 	int ndescs = ALIGN(max_num_sg, 4);
1828 	struct mlx5_ib_mr *mr;
1829 	u32 *in;
1830 	int err;
1831 
1832 	mr = kzalloc(sizeof(*mr), GFP_KERNEL);
1833 	if (!mr)
1834 		return ERR_PTR(-ENOMEM);
1835 
1836 	in = kzalloc(inlen, GFP_KERNEL);
1837 	if (!in) {
1838 		err = -ENOMEM;
1839 		goto err_free;
1840 	}
1841 
1842 	mr->ibmr.device = pd->device;
1843 	mr->umem = NULL;
1844 
1845 	switch (mr_type) {
1846 	case IB_MR_TYPE_MEM_REG:
1847 		err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen);
1848 		break;
1849 	case IB_MR_TYPE_SG_GAPS:
1850 		err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen);
1851 		break;
1852 	case IB_MR_TYPE_INTEGRITY:
1853 		err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg,
1854 						 max_num_meta_sg, in, inlen);
1855 		break;
1856 	default:
1857 		mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type);
1858 		err = -EINVAL;
1859 	}
1860 
1861 	if (err)
1862 		goto err_free_in;
1863 
1864 	kfree(in);
1865 
1866 	return &mr->ibmr;
1867 
1868 err_free_in:
1869 	kfree(in);
1870 err_free:
1871 	kfree(mr);
1872 	return ERR_PTR(err);
1873 }
1874 
1875 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type,
1876 			       u32 max_num_sg, struct ib_udata *udata)
1877 {
1878 	return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0);
1879 }
1880 
1881 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd,
1882 					 u32 max_num_sg, u32 max_num_meta_sg)
1883 {
1884 	return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg,
1885 				  max_num_meta_sg);
1886 }
1887 
1888 struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type,
1889 			       struct ib_udata *udata)
1890 {
1891 	struct mlx5_ib_dev *dev = to_mdev(pd->device);
1892 	int inlen = MLX5_ST_SZ_BYTES(create_mkey_in);
1893 	struct mlx5_ib_mw *mw = NULL;
1894 	u32 *in = NULL;
1895 	void *mkc;
1896 	int ndescs;
1897 	int err;
1898 	struct mlx5_ib_alloc_mw req = {};
1899 	struct {
1900 		__u32	comp_mask;
1901 		__u32	response_length;
1902 	} resp = {};
1903 
1904 	err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req)));
1905 	if (err)
1906 		return ERR_PTR(err);
1907 
1908 	if (req.comp_mask || req.reserved1 || req.reserved2)
1909 		return ERR_PTR(-EOPNOTSUPP);
1910 
1911 	if (udata->inlen > sizeof(req) &&
1912 	    !ib_is_udata_cleared(udata, sizeof(req),
1913 				 udata->inlen - sizeof(req)))
1914 		return ERR_PTR(-EOPNOTSUPP);
1915 
1916 	ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4);
1917 
1918 	mw = kzalloc(sizeof(*mw), GFP_KERNEL);
1919 	in = kzalloc(inlen, GFP_KERNEL);
1920 	if (!mw || !in) {
1921 		err = -ENOMEM;
1922 		goto free;
1923 	}
1924 
1925 	mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry);
1926 
1927 	MLX5_SET(mkc, mkc, free, 1);
1928 	MLX5_SET(mkc, mkc, translations_octword_size, ndescs);
1929 	MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn);
1930 	MLX5_SET(mkc, mkc, umr_en, 1);
1931 	MLX5_SET(mkc, mkc, lr, 1);
1932 	MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS);
1933 	MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2)));
1934 	MLX5_SET(mkc, mkc, qpn, 0xffffff);
1935 
1936 	err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen);
1937 	if (err)
1938 		goto free;
1939 
1940 	mw->mmkey.type = MLX5_MKEY_MW;
1941 	mw->ibmw.rkey = mw->mmkey.key;
1942 	mw->ndescs = ndescs;
1943 
1944 	resp.response_length = min(offsetof(typeof(resp), response_length) +
1945 				   sizeof(resp.response_length), udata->outlen);
1946 	if (resp.response_length) {
1947 		err = ib_copy_to_udata(udata, &resp, resp.response_length);
1948 		if (err) {
1949 			mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey);
1950 			goto free;
1951 		}
1952 	}
1953 
1954 	kfree(in);
1955 	return &mw->ibmw;
1956 
1957 free:
1958 	kfree(mw);
1959 	kfree(in);
1960 	return ERR_PTR(err);
1961 }
1962 
1963 int mlx5_ib_dealloc_mw(struct ib_mw *mw)
1964 {
1965 	struct mlx5_ib_dev *dev = to_mdev(mw->device);
1966 	struct mlx5_ib_mw *mmw = to_mmw(mw);
1967 	int err;
1968 
1969 	if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) {
1970 		xa_erase_irq(&dev->mdev->priv.mkey_table,
1971 			     mlx5_base_mkey(mmw->mmkey.key));
1972 		/*
1973 		 * pagefault_single_data_segment() may be accessing mmw under
1974 		 * SRCU if the user bound an ODP MR to this MW.
1975 		 */
1976 		synchronize_srcu(&dev->mr_srcu);
1977 	}
1978 
1979 	err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey);
1980 	if (err)
1981 		return err;
1982 	kfree(mmw);
1983 	return 0;
1984 }
1985 
1986 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask,
1987 			    struct ib_mr_status *mr_status)
1988 {
1989 	struct mlx5_ib_mr *mmr = to_mmr(ibmr);
1990 	int ret = 0;
1991 
1992 	if (check_mask & ~IB_MR_CHECK_SIG_STATUS) {
1993 		pr_err("Invalid status check mask\n");
1994 		ret = -EINVAL;
1995 		goto done;
1996 	}
1997 
1998 	mr_status->fail_status = 0;
1999 	if (check_mask & IB_MR_CHECK_SIG_STATUS) {
2000 		if (!mmr->sig) {
2001 			ret = -EINVAL;
2002 			pr_err("signature status check requested on a non-signature enabled MR\n");
2003 			goto done;
2004 		}
2005 
2006 		mmr->sig->sig_status_checked = true;
2007 		if (!mmr->sig->sig_err_exists)
2008 			goto done;
2009 
2010 		if (ibmr->lkey == mmr->sig->err_item.key)
2011 			memcpy(&mr_status->sig_err, &mmr->sig->err_item,
2012 			       sizeof(mr_status->sig_err));
2013 		else {
2014 			mr_status->sig_err.err_type = IB_SIG_BAD_GUARD;
2015 			mr_status->sig_err.sig_err_offset = 0;
2016 			mr_status->sig_err.key = mmr->sig->err_item.key;
2017 		}
2018 
2019 		mmr->sig->sig_err_exists = false;
2020 		mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS;
2021 	}
2022 
2023 done:
2024 	return ret;
2025 }
2026 
2027 static int
2028 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2029 			int data_sg_nents, unsigned int *data_sg_offset,
2030 			struct scatterlist *meta_sg, int meta_sg_nents,
2031 			unsigned int *meta_sg_offset)
2032 {
2033 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2034 	unsigned int sg_offset = 0;
2035 	int n = 0;
2036 
2037 	mr->meta_length = 0;
2038 	if (data_sg_nents == 1) {
2039 		n++;
2040 		mr->ndescs = 1;
2041 		if (data_sg_offset)
2042 			sg_offset = *data_sg_offset;
2043 		mr->data_length = sg_dma_len(data_sg) - sg_offset;
2044 		mr->data_iova = sg_dma_address(data_sg) + sg_offset;
2045 		if (meta_sg_nents == 1) {
2046 			n++;
2047 			mr->meta_ndescs = 1;
2048 			if (meta_sg_offset)
2049 				sg_offset = *meta_sg_offset;
2050 			else
2051 				sg_offset = 0;
2052 			mr->meta_length = sg_dma_len(meta_sg) - sg_offset;
2053 			mr->pi_iova = sg_dma_address(meta_sg) + sg_offset;
2054 		}
2055 		ibmr->length = mr->data_length + mr->meta_length;
2056 	}
2057 
2058 	return n;
2059 }
2060 
2061 static int
2062 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr,
2063 		   struct scatterlist *sgl,
2064 		   unsigned short sg_nents,
2065 		   unsigned int *sg_offset_p,
2066 		   struct scatterlist *meta_sgl,
2067 		   unsigned short meta_sg_nents,
2068 		   unsigned int *meta_sg_offset_p)
2069 {
2070 	struct scatterlist *sg = sgl;
2071 	struct mlx5_klm *klms = mr->descs;
2072 	unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0;
2073 	u32 lkey = mr->ibmr.pd->local_dma_lkey;
2074 	int i, j = 0;
2075 
2076 	mr->ibmr.iova = sg_dma_address(sg) + sg_offset;
2077 	mr->ibmr.length = 0;
2078 
2079 	for_each_sg(sgl, sg, sg_nents, i) {
2080 		if (unlikely(i >= mr->max_descs))
2081 			break;
2082 		klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset);
2083 		klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset);
2084 		klms[i].key = cpu_to_be32(lkey);
2085 		mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2086 
2087 		sg_offset = 0;
2088 	}
2089 
2090 	if (sg_offset_p)
2091 		*sg_offset_p = sg_offset;
2092 
2093 	mr->ndescs = i;
2094 	mr->data_length = mr->ibmr.length;
2095 
2096 	if (meta_sg_nents) {
2097 		sg = meta_sgl;
2098 		sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0;
2099 		for_each_sg(meta_sgl, sg, meta_sg_nents, j) {
2100 			if (unlikely(i + j >= mr->max_descs))
2101 				break;
2102 			klms[i + j].va = cpu_to_be64(sg_dma_address(sg) +
2103 						     sg_offset);
2104 			klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) -
2105 							 sg_offset);
2106 			klms[i + j].key = cpu_to_be32(lkey);
2107 			mr->ibmr.length += sg_dma_len(sg) - sg_offset;
2108 
2109 			sg_offset = 0;
2110 		}
2111 		if (meta_sg_offset_p)
2112 			*meta_sg_offset_p = sg_offset;
2113 
2114 		mr->meta_ndescs = j;
2115 		mr->meta_length = mr->ibmr.length - mr->data_length;
2116 	}
2117 
2118 	return i + j;
2119 }
2120 
2121 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr)
2122 {
2123 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2124 	__be64 *descs;
2125 
2126 	if (unlikely(mr->ndescs == mr->max_descs))
2127 		return -ENOMEM;
2128 
2129 	descs = mr->descs;
2130 	descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2131 
2132 	return 0;
2133 }
2134 
2135 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr)
2136 {
2137 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2138 	__be64 *descs;
2139 
2140 	if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs))
2141 		return -ENOMEM;
2142 
2143 	descs = mr->descs;
2144 	descs[mr->ndescs + mr->meta_ndescs++] =
2145 		cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR);
2146 
2147 	return 0;
2148 }
2149 
2150 static int
2151 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2152 			 int data_sg_nents, unsigned int *data_sg_offset,
2153 			 struct scatterlist *meta_sg, int meta_sg_nents,
2154 			 unsigned int *meta_sg_offset)
2155 {
2156 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2157 	struct mlx5_ib_mr *pi_mr = mr->mtt_mr;
2158 	int n;
2159 
2160 	pi_mr->ndescs = 0;
2161 	pi_mr->meta_ndescs = 0;
2162 	pi_mr->meta_length = 0;
2163 
2164 	ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2165 				   pi_mr->desc_size * pi_mr->max_descs,
2166 				   DMA_TO_DEVICE);
2167 
2168 	pi_mr->ibmr.page_size = ibmr->page_size;
2169 	n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset,
2170 			   mlx5_set_page);
2171 	if (n != data_sg_nents)
2172 		return n;
2173 
2174 	pi_mr->data_iova = pi_mr->ibmr.iova;
2175 	pi_mr->data_length = pi_mr->ibmr.length;
2176 	pi_mr->ibmr.length = pi_mr->data_length;
2177 	ibmr->length = pi_mr->data_length;
2178 
2179 	if (meta_sg_nents) {
2180 		u64 page_mask = ~((u64)ibmr->page_size - 1);
2181 		u64 iova = pi_mr->data_iova;
2182 
2183 		n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents,
2184 				    meta_sg_offset, mlx5_set_page_pi);
2185 
2186 		pi_mr->meta_length = pi_mr->ibmr.length;
2187 		/*
2188 		 * PI address for the HW is the offset of the metadata address
2189 		 * relative to the first data page address.
2190 		 * It equals to first data page address + size of data pages +
2191 		 * metadata offset at the first metadata page
2192 		 */
2193 		pi_mr->pi_iova = (iova & page_mask) +
2194 				 pi_mr->ndescs * ibmr->page_size +
2195 				 (pi_mr->ibmr.iova & ~page_mask);
2196 		/*
2197 		 * In order to use one MTT MR for data and metadata, we register
2198 		 * also the gaps between the end of the data and the start of
2199 		 * the metadata (the sig MR will verify that the HW will access
2200 		 * to right addresses). This mapping is safe because we use
2201 		 * internal mkey for the registration.
2202 		 */
2203 		pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova;
2204 		pi_mr->ibmr.iova = iova;
2205 		ibmr->length += pi_mr->meta_length;
2206 	}
2207 
2208 	ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2209 				      pi_mr->desc_size * pi_mr->max_descs,
2210 				      DMA_TO_DEVICE);
2211 
2212 	return n;
2213 }
2214 
2215 static int
2216 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2217 			 int data_sg_nents, unsigned int *data_sg_offset,
2218 			 struct scatterlist *meta_sg, int meta_sg_nents,
2219 			 unsigned int *meta_sg_offset)
2220 {
2221 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2222 	struct mlx5_ib_mr *pi_mr = mr->klm_mr;
2223 	int n;
2224 
2225 	pi_mr->ndescs = 0;
2226 	pi_mr->meta_ndescs = 0;
2227 	pi_mr->meta_length = 0;
2228 
2229 	ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map,
2230 				   pi_mr->desc_size * pi_mr->max_descs,
2231 				   DMA_TO_DEVICE);
2232 
2233 	n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset,
2234 			       meta_sg, meta_sg_nents, meta_sg_offset);
2235 
2236 	ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map,
2237 				      pi_mr->desc_size * pi_mr->max_descs,
2238 				      DMA_TO_DEVICE);
2239 
2240 	/* This is zero-based memory region */
2241 	pi_mr->data_iova = 0;
2242 	pi_mr->ibmr.iova = 0;
2243 	pi_mr->pi_iova = pi_mr->data_length;
2244 	ibmr->length = pi_mr->ibmr.length;
2245 
2246 	return n;
2247 }
2248 
2249 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg,
2250 			 int data_sg_nents, unsigned int *data_sg_offset,
2251 			 struct scatterlist *meta_sg, int meta_sg_nents,
2252 			 unsigned int *meta_sg_offset)
2253 {
2254 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2255 	struct mlx5_ib_mr *pi_mr = NULL;
2256 	int n;
2257 
2258 	WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY);
2259 
2260 	mr->ndescs = 0;
2261 	mr->data_length = 0;
2262 	mr->data_iova = 0;
2263 	mr->meta_ndescs = 0;
2264 	mr->pi_iova = 0;
2265 	/*
2266 	 * As a performance optimization, if possible, there is no need to
2267 	 * perform UMR operation to register the data/metadata buffers.
2268 	 * First try to map the sg lists to PA descriptors with local_dma_lkey.
2269 	 * Fallback to UMR only in case of a failure.
2270 	 */
2271 	n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2272 				    data_sg_offset, meta_sg, meta_sg_nents,
2273 				    meta_sg_offset);
2274 	if (n == data_sg_nents + meta_sg_nents)
2275 		goto out;
2276 	/*
2277 	 * As a performance optimization, if possible, there is no need to map
2278 	 * the sg lists to KLM descriptors. First try to map the sg lists to MTT
2279 	 * descriptors and fallback to KLM only in case of a failure.
2280 	 * It's more efficient for the HW to work with MTT descriptors
2281 	 * (especially in high load).
2282 	 * Use KLM (indirect access) only if it's mandatory.
2283 	 */
2284 	pi_mr = mr->mtt_mr;
2285 	n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2286 				     data_sg_offset, meta_sg, meta_sg_nents,
2287 				     meta_sg_offset);
2288 	if (n == data_sg_nents + meta_sg_nents)
2289 		goto out;
2290 
2291 	pi_mr = mr->klm_mr;
2292 	n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents,
2293 				     data_sg_offset, meta_sg, meta_sg_nents,
2294 				     meta_sg_offset);
2295 	if (unlikely(n != data_sg_nents + meta_sg_nents))
2296 		return -ENOMEM;
2297 
2298 out:
2299 	/* This is zero-based memory region */
2300 	ibmr->iova = 0;
2301 	mr->pi_mr = pi_mr;
2302 	if (pi_mr)
2303 		ibmr->sig_attrs->meta_length = pi_mr->meta_length;
2304 	else
2305 		ibmr->sig_attrs->meta_length = mr->meta_length;
2306 
2307 	return 0;
2308 }
2309 
2310 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents,
2311 		      unsigned int *sg_offset)
2312 {
2313 	struct mlx5_ib_mr *mr = to_mmr(ibmr);
2314 	int n;
2315 
2316 	mr->ndescs = 0;
2317 
2318 	ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map,
2319 				   mr->desc_size * mr->max_descs,
2320 				   DMA_TO_DEVICE);
2321 
2322 	if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS)
2323 		n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0,
2324 				       NULL);
2325 	else
2326 		n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset,
2327 				mlx5_set_page);
2328 
2329 	ib_dma_sync_single_for_device(ibmr->device, mr->desc_map,
2330 				      mr->desc_size * mr->max_descs,
2331 				      DMA_TO_DEVICE);
2332 
2333 	return n;
2334 }
2335