1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 34 #include <linux/kref.h> 35 #include <linux/random.h> 36 #include <linux/debugfs.h> 37 #include <linux/export.h> 38 #include <linux/delay.h> 39 #include <rdma/ib_umem.h> 40 #include <rdma/ib_umem_odp.h> 41 #include <rdma/ib_verbs.h> 42 #include "mlx5_ib.h" 43 44 enum { 45 MAX_PENDING_REG_MR = 8, 46 }; 47 48 #define MLX5_UMR_ALIGN 2048 49 50 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 51 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 52 static int mr_cache_max_order(struct mlx5_ib_dev *dev); 53 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 54 55 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) 56 { 57 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); 58 } 59 60 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 61 { 62 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 63 64 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) 65 /* Wait until all page fault handlers using the mr complete. */ 66 synchronize_srcu(&dev->mr_srcu); 67 68 return err; 69 } 70 71 static int order2idx(struct mlx5_ib_dev *dev, int order) 72 { 73 struct mlx5_mr_cache *cache = &dev->cache; 74 75 if (order < cache->ent[0].order) 76 return 0; 77 else 78 return order - cache->ent[0].order; 79 } 80 81 static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) 82 { 83 return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= 84 length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); 85 } 86 87 static void reg_mr_callback(int status, struct mlx5_async_work *context) 88 { 89 struct mlx5_ib_mr *mr = 90 container_of(context, struct mlx5_ib_mr, cb_work); 91 struct mlx5_ib_dev *dev = mr->dev; 92 struct mlx5_mr_cache *cache = &dev->cache; 93 int c = order2idx(dev, mr->order); 94 struct mlx5_cache_ent *ent = &cache->ent[c]; 95 u8 key; 96 unsigned long flags; 97 struct xarray *mkeys = &dev->mdev->priv.mkey_table; 98 int err; 99 100 spin_lock_irqsave(&ent->lock, flags); 101 ent->pending--; 102 spin_unlock_irqrestore(&ent->lock, flags); 103 if (status) { 104 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 105 kfree(mr); 106 dev->fill_delay = 1; 107 mod_timer(&dev->delay_timer, jiffies + HZ); 108 return; 109 } 110 111 mr->mmkey.type = MLX5_MKEY_MR; 112 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 113 key = dev->mdev->priv.mkey_key++; 114 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 115 mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; 116 117 cache->last_add = jiffies; 118 119 spin_lock_irqsave(&ent->lock, flags); 120 list_add_tail(&mr->list, &ent->head); 121 ent->cur++; 122 ent->size++; 123 spin_unlock_irqrestore(&ent->lock, flags); 124 125 xa_lock_irqsave(mkeys, flags); 126 err = xa_err(__xa_store(mkeys, mlx5_base_mkey(mr->mmkey.key), 127 &mr->mmkey, GFP_ATOMIC)); 128 xa_unlock_irqrestore(mkeys, flags); 129 if (err) 130 pr_err("Error inserting to mkey tree. 0x%x\n", -err); 131 132 if (!completion_done(&ent->compl)) 133 complete(&ent->compl); 134 } 135 136 static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 137 { 138 struct mlx5_mr_cache *cache = &dev->cache; 139 struct mlx5_cache_ent *ent = &cache->ent[c]; 140 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 141 struct mlx5_ib_mr *mr; 142 void *mkc; 143 u32 *in; 144 int err = 0; 145 int i; 146 147 in = kzalloc(inlen, GFP_KERNEL); 148 if (!in) 149 return -ENOMEM; 150 151 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 152 for (i = 0; i < num; i++) { 153 if (ent->pending >= MAX_PENDING_REG_MR) { 154 err = -EAGAIN; 155 break; 156 } 157 158 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 159 if (!mr) { 160 err = -ENOMEM; 161 break; 162 } 163 mr->order = ent->order; 164 mr->allocated_from_cache = 1; 165 mr->dev = dev; 166 167 MLX5_SET(mkc, mkc, free, 1); 168 MLX5_SET(mkc, mkc, umr_en, 1); 169 MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); 170 MLX5_SET(mkc, mkc, access_mode_4_2, 171 (ent->access_mode >> 2) & 0x7); 172 173 MLX5_SET(mkc, mkc, qpn, 0xffffff); 174 MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); 175 MLX5_SET(mkc, mkc, log_page_size, ent->page); 176 177 spin_lock_irq(&ent->lock); 178 ent->pending++; 179 spin_unlock_irq(&ent->lock); 180 err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, 181 &dev->async_ctx, in, inlen, 182 mr->out, sizeof(mr->out), 183 reg_mr_callback, &mr->cb_work); 184 if (err) { 185 spin_lock_irq(&ent->lock); 186 ent->pending--; 187 spin_unlock_irq(&ent->lock); 188 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 189 kfree(mr); 190 break; 191 } 192 } 193 194 kfree(in); 195 return err; 196 } 197 198 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 199 { 200 struct mlx5_mr_cache *cache = &dev->cache; 201 struct mlx5_cache_ent *ent = &cache->ent[c]; 202 struct mlx5_ib_mr *tmp_mr; 203 struct mlx5_ib_mr *mr; 204 LIST_HEAD(del_list); 205 int i; 206 207 for (i = 0; i < num; i++) { 208 spin_lock_irq(&ent->lock); 209 if (list_empty(&ent->head)) { 210 spin_unlock_irq(&ent->lock); 211 break; 212 } 213 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 214 list_move(&mr->list, &del_list); 215 ent->cur--; 216 ent->size--; 217 spin_unlock_irq(&ent->lock); 218 mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 219 } 220 221 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) 222 synchronize_srcu(&dev->mr_srcu); 223 224 list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { 225 list_del(&mr->list); 226 kfree(mr); 227 } 228 } 229 230 static ssize_t size_write(struct file *filp, const char __user *buf, 231 size_t count, loff_t *pos) 232 { 233 struct mlx5_cache_ent *ent = filp->private_data; 234 struct mlx5_ib_dev *dev = ent->dev; 235 char lbuf[20] = {0}; 236 u32 var; 237 int err; 238 int c; 239 240 count = min(count, sizeof(lbuf) - 1); 241 if (copy_from_user(lbuf, buf, count)) 242 return -EFAULT; 243 244 c = order2idx(dev, ent->order); 245 246 if (sscanf(lbuf, "%u", &var) != 1) 247 return -EINVAL; 248 249 if (var < ent->limit) 250 return -EINVAL; 251 252 if (var > ent->size) { 253 do { 254 err = add_keys(dev, c, var - ent->size); 255 if (err && err != -EAGAIN) 256 return err; 257 258 usleep_range(3000, 5000); 259 } while (err); 260 } else if (var < ent->size) { 261 remove_keys(dev, c, ent->size - var); 262 } 263 264 return count; 265 } 266 267 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 268 loff_t *pos) 269 { 270 struct mlx5_cache_ent *ent = filp->private_data; 271 char lbuf[20]; 272 int err; 273 274 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 275 if (err < 0) 276 return err; 277 278 return simple_read_from_buffer(buf, count, pos, lbuf, err); 279 } 280 281 static const struct file_operations size_fops = { 282 .owner = THIS_MODULE, 283 .open = simple_open, 284 .write = size_write, 285 .read = size_read, 286 }; 287 288 static ssize_t limit_write(struct file *filp, const char __user *buf, 289 size_t count, loff_t *pos) 290 { 291 struct mlx5_cache_ent *ent = filp->private_data; 292 struct mlx5_ib_dev *dev = ent->dev; 293 char lbuf[20] = {0}; 294 u32 var; 295 int err; 296 int c; 297 298 count = min(count, sizeof(lbuf) - 1); 299 if (copy_from_user(lbuf, buf, count)) 300 return -EFAULT; 301 302 c = order2idx(dev, ent->order); 303 304 if (sscanf(lbuf, "%u", &var) != 1) 305 return -EINVAL; 306 307 if (var > ent->size) 308 return -EINVAL; 309 310 ent->limit = var; 311 312 if (ent->cur < ent->limit) { 313 err = add_keys(dev, c, 2 * ent->limit - ent->cur); 314 if (err) 315 return err; 316 } 317 318 return count; 319 } 320 321 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 322 loff_t *pos) 323 { 324 struct mlx5_cache_ent *ent = filp->private_data; 325 char lbuf[20]; 326 int err; 327 328 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 329 if (err < 0) 330 return err; 331 332 return simple_read_from_buffer(buf, count, pos, lbuf, err); 333 } 334 335 static const struct file_operations limit_fops = { 336 .owner = THIS_MODULE, 337 .open = simple_open, 338 .write = limit_write, 339 .read = limit_read, 340 }; 341 342 static int someone_adding(struct mlx5_mr_cache *cache) 343 { 344 int i; 345 346 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 347 if (cache->ent[i].cur < cache->ent[i].limit) 348 return 1; 349 } 350 351 return 0; 352 } 353 354 static void __cache_work_func(struct mlx5_cache_ent *ent) 355 { 356 struct mlx5_ib_dev *dev = ent->dev; 357 struct mlx5_mr_cache *cache = &dev->cache; 358 int i = order2idx(dev, ent->order); 359 int err; 360 361 if (cache->stopped) 362 return; 363 364 ent = &dev->cache.ent[i]; 365 if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 366 err = add_keys(dev, i, 1); 367 if (ent->cur < 2 * ent->limit) { 368 if (err == -EAGAIN) { 369 mlx5_ib_dbg(dev, "returned eagain, order %d\n", 370 i + 2); 371 queue_delayed_work(cache->wq, &ent->dwork, 372 msecs_to_jiffies(3)); 373 } else if (err) { 374 mlx5_ib_warn(dev, "command failed order %d, err %d\n", 375 i + 2, err); 376 queue_delayed_work(cache->wq, &ent->dwork, 377 msecs_to_jiffies(1000)); 378 } else { 379 queue_work(cache->wq, &ent->work); 380 } 381 } 382 } else if (ent->cur > 2 * ent->limit) { 383 /* 384 * The remove_keys() logic is performed as garbage collection 385 * task. Such task is intended to be run when no other active 386 * processes are running. 387 * 388 * The need_resched() will return TRUE if there are user tasks 389 * to be activated in near future. 390 * 391 * In such case, we don't execute remove_keys() and postpone 392 * the garbage collection work to try to run in next cycle, 393 * in order to free CPU resources to other tasks. 394 */ 395 if (!need_resched() && !someone_adding(cache) && 396 time_after(jiffies, cache->last_add + 300 * HZ)) { 397 remove_keys(dev, i, 1); 398 if (ent->cur > ent->limit) 399 queue_work(cache->wq, &ent->work); 400 } else { 401 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 402 } 403 } 404 } 405 406 static void delayed_cache_work_func(struct work_struct *work) 407 { 408 struct mlx5_cache_ent *ent; 409 410 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 411 __cache_work_func(ent); 412 } 413 414 static void cache_work_func(struct work_struct *work) 415 { 416 struct mlx5_cache_ent *ent; 417 418 ent = container_of(work, struct mlx5_cache_ent, work); 419 __cache_work_func(ent); 420 } 421 422 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) 423 { 424 struct mlx5_mr_cache *cache = &dev->cache; 425 struct mlx5_cache_ent *ent; 426 struct mlx5_ib_mr *mr; 427 int err; 428 429 if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { 430 mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); 431 return NULL; 432 } 433 434 ent = &cache->ent[entry]; 435 while (1) { 436 spin_lock_irq(&ent->lock); 437 if (list_empty(&ent->head)) { 438 spin_unlock_irq(&ent->lock); 439 440 err = add_keys(dev, entry, 1); 441 if (err && err != -EAGAIN) 442 return ERR_PTR(err); 443 444 wait_for_completion(&ent->compl); 445 } else { 446 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 447 list); 448 list_del(&mr->list); 449 ent->cur--; 450 spin_unlock_irq(&ent->lock); 451 if (ent->cur < ent->limit) 452 queue_work(cache->wq, &ent->work); 453 return mr; 454 } 455 } 456 } 457 458 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 459 { 460 struct mlx5_mr_cache *cache = &dev->cache; 461 struct mlx5_ib_mr *mr = NULL; 462 struct mlx5_cache_ent *ent; 463 int last_umr_cache_entry; 464 int c; 465 int i; 466 467 c = order2idx(dev, order); 468 last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev)); 469 if (c < 0 || c > last_umr_cache_entry) { 470 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 471 return NULL; 472 } 473 474 for (i = c; i <= last_umr_cache_entry; i++) { 475 ent = &cache->ent[i]; 476 477 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 478 479 spin_lock_irq(&ent->lock); 480 if (!list_empty(&ent->head)) { 481 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 482 list); 483 list_del(&mr->list); 484 ent->cur--; 485 spin_unlock_irq(&ent->lock); 486 if (ent->cur < ent->limit) 487 queue_work(cache->wq, &ent->work); 488 break; 489 } 490 spin_unlock_irq(&ent->lock); 491 492 queue_work(cache->wq, &ent->work); 493 } 494 495 if (!mr) 496 cache->ent[c].miss++; 497 498 return mr; 499 } 500 501 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 502 { 503 struct mlx5_mr_cache *cache = &dev->cache; 504 struct mlx5_cache_ent *ent; 505 int shrink = 0; 506 int c; 507 508 if (!mr->allocated_from_cache) 509 return; 510 511 c = order2idx(dev, mr->order); 512 WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); 513 514 if (unreg_umr(dev, mr)) { 515 mr->allocated_from_cache = false; 516 destroy_mkey(dev, mr); 517 ent = &cache->ent[c]; 518 if (ent->cur < ent->limit) 519 queue_work(cache->wq, &ent->work); 520 return; 521 } 522 523 ent = &cache->ent[c]; 524 spin_lock_irq(&ent->lock); 525 list_add_tail(&mr->list, &ent->head); 526 ent->cur++; 527 if (ent->cur > 2 * ent->limit) 528 shrink = 1; 529 spin_unlock_irq(&ent->lock); 530 531 if (shrink) 532 queue_work(cache->wq, &ent->work); 533 } 534 535 static void clean_keys(struct mlx5_ib_dev *dev, int c) 536 { 537 struct mlx5_mr_cache *cache = &dev->cache; 538 struct mlx5_cache_ent *ent = &cache->ent[c]; 539 struct mlx5_ib_mr *tmp_mr; 540 struct mlx5_ib_mr *mr; 541 LIST_HEAD(del_list); 542 543 cancel_delayed_work(&ent->dwork); 544 while (1) { 545 spin_lock_irq(&ent->lock); 546 if (list_empty(&ent->head)) { 547 spin_unlock_irq(&ent->lock); 548 break; 549 } 550 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 551 list_move(&mr->list, &del_list); 552 ent->cur--; 553 ent->size--; 554 spin_unlock_irq(&ent->lock); 555 mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 556 } 557 558 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 559 synchronize_srcu(&dev->mr_srcu); 560 #endif 561 562 list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { 563 list_del(&mr->list); 564 kfree(mr); 565 } 566 } 567 568 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 569 { 570 if (!mlx5_debugfs_root || dev->is_rep) 571 return; 572 573 debugfs_remove_recursive(dev->cache.root); 574 dev->cache.root = NULL; 575 } 576 577 static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) 578 { 579 struct mlx5_mr_cache *cache = &dev->cache; 580 struct mlx5_cache_ent *ent; 581 struct dentry *dir; 582 int i; 583 584 if (!mlx5_debugfs_root || dev->is_rep) 585 return; 586 587 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); 588 589 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 590 ent = &cache->ent[i]; 591 sprintf(ent->name, "%d", ent->order); 592 dir = debugfs_create_dir(ent->name, cache->root); 593 debugfs_create_file("size", 0600, dir, ent, &size_fops); 594 debugfs_create_file("limit", 0600, dir, ent, &limit_fops); 595 debugfs_create_u32("cur", 0400, dir, &ent->cur); 596 debugfs_create_u32("miss", 0600, dir, &ent->miss); 597 } 598 } 599 600 static void delay_time_func(struct timer_list *t) 601 { 602 struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer); 603 604 dev->fill_delay = 0; 605 } 606 607 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 608 { 609 struct mlx5_mr_cache *cache = &dev->cache; 610 struct mlx5_cache_ent *ent; 611 int i; 612 613 mutex_init(&dev->slow_path_mutex); 614 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); 615 if (!cache->wq) { 616 mlx5_ib_warn(dev, "failed to create work queue\n"); 617 return -ENOMEM; 618 } 619 620 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); 621 timer_setup(&dev->delay_timer, delay_time_func, 0); 622 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 623 ent = &cache->ent[i]; 624 INIT_LIST_HEAD(&ent->head); 625 spin_lock_init(&ent->lock); 626 ent->order = i + 2; 627 ent->dev = dev; 628 ent->limit = 0; 629 630 init_completion(&ent->compl); 631 INIT_WORK(&ent->work, cache_work_func); 632 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 633 634 if (i > MR_CACHE_LAST_STD_ENTRY) { 635 mlx5_odp_init_mr_cache_entry(ent); 636 continue; 637 } 638 639 if (ent->order > mr_cache_max_order(dev)) 640 continue; 641 642 ent->page = PAGE_SHIFT; 643 ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / 644 MLX5_IB_UMR_OCTOWORD; 645 ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; 646 if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && 647 !dev->is_rep && 648 mlx5_core_is_pf(dev->mdev)) 649 ent->limit = dev->mdev->profile->mr_cache[i].limit; 650 else 651 ent->limit = 0; 652 queue_work(cache->wq, &ent->work); 653 } 654 655 mlx5_mr_cache_debugfs_init(dev); 656 657 return 0; 658 } 659 660 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 661 { 662 int i; 663 664 if (!dev->cache.wq) 665 return 0; 666 667 dev->cache.stopped = 1; 668 flush_workqueue(dev->cache.wq); 669 670 mlx5_mr_cache_debugfs_cleanup(dev); 671 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); 672 673 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 674 clean_keys(dev, i); 675 676 destroy_workqueue(dev->cache.wq); 677 del_timer_sync(&dev->delay_timer); 678 679 return 0; 680 } 681 682 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 683 { 684 struct mlx5_ib_dev *dev = to_mdev(pd->device); 685 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 686 struct mlx5_core_dev *mdev = dev->mdev; 687 struct mlx5_ib_mr *mr; 688 void *mkc; 689 u32 *in; 690 int err; 691 692 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 693 if (!mr) 694 return ERR_PTR(-ENOMEM); 695 696 in = kzalloc(inlen, GFP_KERNEL); 697 if (!in) { 698 err = -ENOMEM; 699 goto err_free; 700 } 701 702 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 703 704 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 705 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); 706 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); 707 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); 708 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); 709 MLX5_SET(mkc, mkc, lr, 1); 710 711 MLX5_SET(mkc, mkc, length64, 1); 712 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 713 MLX5_SET(mkc, mkc, qpn, 0xffffff); 714 MLX5_SET64(mkc, mkc, start_addr, 0); 715 716 err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); 717 if (err) 718 goto err_in; 719 720 kfree(in); 721 mr->mmkey.type = MLX5_MKEY_MR; 722 mr->ibmr.lkey = mr->mmkey.key; 723 mr->ibmr.rkey = mr->mmkey.key; 724 mr->umem = NULL; 725 726 return &mr->ibmr; 727 728 err_in: 729 kfree(in); 730 731 err_free: 732 kfree(mr); 733 734 return ERR_PTR(err); 735 } 736 737 static int get_octo_len(u64 addr, u64 len, int page_shift) 738 { 739 u64 page_size = 1ULL << page_shift; 740 u64 offset; 741 int npages; 742 743 offset = addr & (page_size - 1); 744 npages = ALIGN(len + offset, page_size) >> page_shift; 745 return (npages + 1) / 2; 746 } 747 748 static int mr_cache_max_order(struct mlx5_ib_dev *dev) 749 { 750 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) 751 return MR_CACHE_LAST_STD_ENTRY + 2; 752 return MLX5_MAX_UMR_SHIFT; 753 } 754 755 static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, 756 u64 start, u64 length, int access_flags, 757 struct ib_umem **umem, int *npages, int *page_shift, 758 int *ncont, int *order) 759 { 760 struct ib_umem *u; 761 762 *umem = NULL; 763 764 if (access_flags & IB_ACCESS_ON_DEMAND) { 765 struct ib_umem_odp *odp; 766 767 odp = ib_umem_odp_get(udata, start, length, access_flags); 768 if (IS_ERR(odp)) { 769 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", 770 PTR_ERR(odp)); 771 return PTR_ERR(odp); 772 } 773 774 u = &odp->umem; 775 776 *page_shift = odp->page_shift; 777 *ncont = ib_umem_odp_num_pages(odp); 778 *npages = *ncont << (*page_shift - PAGE_SHIFT); 779 if (order) 780 *order = ilog2(roundup_pow_of_two(*ncont)); 781 } else { 782 u = ib_umem_get(udata, start, length, access_flags, 0); 783 if (IS_ERR(u)) { 784 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); 785 return PTR_ERR(u); 786 } 787 788 mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, 789 page_shift, ncont, order); 790 } 791 792 if (!*npages) { 793 mlx5_ib_warn(dev, "avoid zero region\n"); 794 ib_umem_release(u); 795 return -EINVAL; 796 } 797 798 *umem = u; 799 800 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 801 *npages, *ncont, *order, *page_shift); 802 803 return 0; 804 } 805 806 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) 807 { 808 struct mlx5_ib_umr_context *context = 809 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); 810 811 context->status = wc->status; 812 complete(&context->done); 813 } 814 815 static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) 816 { 817 context->cqe.done = mlx5_ib_umr_done; 818 context->status = -1; 819 init_completion(&context->done); 820 } 821 822 static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, 823 struct mlx5_umr_wr *umrwr) 824 { 825 struct umr_common *umrc = &dev->umrc; 826 const struct ib_send_wr *bad; 827 int err; 828 struct mlx5_ib_umr_context umr_context; 829 830 mlx5_ib_init_umr_context(&umr_context); 831 umrwr->wr.wr_cqe = &umr_context.cqe; 832 833 down(&umrc->sem); 834 err = ib_post_send(umrc->qp, &umrwr->wr, &bad); 835 if (err) { 836 mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); 837 } else { 838 wait_for_completion(&umr_context.done); 839 if (umr_context.status != IB_WC_SUCCESS) { 840 mlx5_ib_warn(dev, "reg umr failed (%u)\n", 841 umr_context.status); 842 err = -EFAULT; 843 } 844 } 845 up(&umrc->sem); 846 return err; 847 } 848 849 static struct mlx5_ib_mr *alloc_mr_from_cache( 850 struct ib_pd *pd, struct ib_umem *umem, 851 u64 virt_addr, u64 len, int npages, 852 int page_shift, int order, int access_flags) 853 { 854 struct mlx5_ib_dev *dev = to_mdev(pd->device); 855 struct mlx5_ib_mr *mr; 856 int err = 0; 857 int i; 858 859 for (i = 0; i < 1; i++) { 860 mr = alloc_cached_mr(dev, order); 861 if (mr) 862 break; 863 864 err = add_keys(dev, order2idx(dev, order), 1); 865 if (err && err != -EAGAIN) { 866 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); 867 break; 868 } 869 } 870 871 if (!mr) 872 return ERR_PTR(-EAGAIN); 873 874 mr->ibmr.pd = pd; 875 mr->umem = umem; 876 mr->access_flags = access_flags; 877 mr->desc_size = sizeof(struct mlx5_mtt); 878 mr->mmkey.iova = virt_addr; 879 mr->mmkey.size = len; 880 mr->mmkey.pd = to_mpd(pd)->pdn; 881 882 return mr; 883 } 884 885 static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages, 886 void *xlt, int page_shift, size_t size, 887 int flags) 888 { 889 struct mlx5_ib_dev *dev = mr->dev; 890 struct ib_umem *umem = mr->umem; 891 892 if (flags & MLX5_IB_UPD_XLT_INDIRECT) { 893 if (!umr_can_use_indirect_mkey(dev)) 894 return -EPERM; 895 mlx5_odp_populate_klm(xlt, idx, npages, mr, flags); 896 return npages; 897 } 898 899 npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx); 900 901 if (!(flags & MLX5_IB_UPD_XLT_ZAP)) { 902 __mlx5_ib_populate_pas(dev, umem, page_shift, 903 idx, npages, xlt, 904 MLX5_IB_MTT_PRESENT); 905 /* Clear padding after the pages 906 * brought from the umem. 907 */ 908 memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0, 909 size - npages * sizeof(struct mlx5_mtt)); 910 } 911 912 return npages; 913 } 914 915 #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ 916 MLX5_UMR_MTT_ALIGNMENT) 917 #define MLX5_SPARE_UMR_CHUNK 0x10000 918 919 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 920 int page_shift, int flags) 921 { 922 struct mlx5_ib_dev *dev = mr->dev; 923 struct device *ddev = dev->ib_dev.dev.parent; 924 int size; 925 void *xlt; 926 dma_addr_t dma; 927 struct mlx5_umr_wr wr; 928 struct ib_sge sg; 929 int err = 0; 930 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) 931 ? sizeof(struct mlx5_klm) 932 : sizeof(struct mlx5_mtt); 933 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; 934 const int page_mask = page_align - 1; 935 size_t pages_mapped = 0; 936 size_t pages_to_map = 0; 937 size_t pages_iter = 0; 938 gfp_t gfp; 939 bool use_emergency_page = false; 940 941 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && 942 !umr_can_use_indirect_mkey(dev)) 943 return -EPERM; 944 945 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 946 * so we need to align the offset and length accordingly 947 */ 948 if (idx & page_mask) { 949 npages += idx & page_mask; 950 idx &= ~page_mask; 951 } 952 953 gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL; 954 gfp |= __GFP_ZERO | __GFP_NOWARN; 955 956 pages_to_map = ALIGN(npages, page_align); 957 size = desc_size * pages_to_map; 958 size = min_t(int, size, MLX5_MAX_UMR_CHUNK); 959 960 xlt = (void *)__get_free_pages(gfp, get_order(size)); 961 if (!xlt && size > MLX5_SPARE_UMR_CHUNK) { 962 mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n", 963 size, get_order(size), MLX5_SPARE_UMR_CHUNK); 964 965 size = MLX5_SPARE_UMR_CHUNK; 966 xlt = (void *)__get_free_pages(gfp, get_order(size)); 967 } 968 969 if (!xlt) { 970 mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); 971 xlt = (void *)mlx5_ib_get_xlt_emergency_page(); 972 size = PAGE_SIZE; 973 memset(xlt, 0, size); 974 use_emergency_page = true; 975 } 976 pages_iter = size / desc_size; 977 dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); 978 if (dma_mapping_error(ddev, dma)) { 979 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); 980 err = -ENOMEM; 981 goto free_xlt; 982 } 983 984 sg.addr = dma; 985 sg.lkey = dev->umrc.pd->local_dma_lkey; 986 987 memset(&wr, 0, sizeof(wr)); 988 wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; 989 if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) 990 wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; 991 wr.wr.sg_list = &sg; 992 wr.wr.num_sge = 1; 993 wr.wr.opcode = MLX5_IB_WR_UMR; 994 995 wr.pd = mr->ibmr.pd; 996 wr.mkey = mr->mmkey.key; 997 wr.length = mr->mmkey.size; 998 wr.virt_addr = mr->mmkey.iova; 999 wr.access_flags = mr->access_flags; 1000 wr.page_shift = page_shift; 1001 1002 for (pages_mapped = 0; 1003 pages_mapped < pages_to_map && !err; 1004 pages_mapped += pages_iter, idx += pages_iter) { 1005 npages = min_t(int, pages_iter, pages_to_map - pages_mapped); 1006 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); 1007 npages = populate_xlt(mr, idx, npages, xlt, 1008 page_shift, size, flags); 1009 1010 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 1011 1012 sg.length = ALIGN(npages * desc_size, 1013 MLX5_UMR_MTT_ALIGNMENT); 1014 1015 if (pages_mapped + pages_iter >= pages_to_map) { 1016 if (flags & MLX5_IB_UPD_XLT_ENABLE) 1017 wr.wr.send_flags |= 1018 MLX5_IB_SEND_UMR_ENABLE_MR | 1019 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | 1020 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1021 if (flags & MLX5_IB_UPD_XLT_PD || 1022 flags & MLX5_IB_UPD_XLT_ACCESS) 1023 wr.wr.send_flags |= 1024 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1025 if (flags & MLX5_IB_UPD_XLT_ADDR) 1026 wr.wr.send_flags |= 1027 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1028 } 1029 1030 wr.offset = idx * desc_size; 1031 wr.xlt_size = sg.length; 1032 1033 err = mlx5_ib_post_send_wait(dev, &wr); 1034 } 1035 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 1036 1037 free_xlt: 1038 if (use_emergency_page) 1039 mlx5_ib_put_xlt_emergency_page(); 1040 else 1041 free_pages((unsigned long)xlt, get_order(size)); 1042 1043 return err; 1044 } 1045 1046 /* 1047 * If ibmr is NULL it will be allocated by reg_create. 1048 * Else, the given ibmr will be used. 1049 */ 1050 static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, 1051 u64 virt_addr, u64 length, 1052 struct ib_umem *umem, int npages, 1053 int page_shift, int access_flags, 1054 bool populate) 1055 { 1056 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1057 struct mlx5_ib_mr *mr; 1058 __be64 *pas; 1059 void *mkc; 1060 int inlen; 1061 u32 *in; 1062 int err; 1063 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 1064 1065 mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); 1066 if (!mr) 1067 return ERR_PTR(-ENOMEM); 1068 1069 mr->ibmr.pd = pd; 1070 mr->access_flags = access_flags; 1071 1072 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1073 if (populate) 1074 inlen += sizeof(*pas) * roundup(npages, 2); 1075 in = kvzalloc(inlen, GFP_KERNEL); 1076 if (!in) { 1077 err = -ENOMEM; 1078 goto err_1; 1079 } 1080 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 1081 if (populate && !(access_flags & IB_ACCESS_ON_DEMAND)) 1082 mlx5_ib_populate_pas(dev, umem, page_shift, pas, 1083 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 1084 1085 /* The pg_access bit allows setting the access flags 1086 * in the page list submitted with the command. */ 1087 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 1088 1089 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1090 MLX5_SET(mkc, mkc, free, !populate); 1091 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); 1092 MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); 1093 MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); 1094 MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); 1095 MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); 1096 MLX5_SET(mkc, mkc, lr, 1); 1097 MLX5_SET(mkc, mkc, umr_en, 1); 1098 1099 MLX5_SET64(mkc, mkc, start_addr, virt_addr); 1100 MLX5_SET64(mkc, mkc, len, length); 1101 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1102 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 1103 MLX5_SET(mkc, mkc, translations_octword_size, 1104 get_octo_len(virt_addr, length, page_shift)); 1105 MLX5_SET(mkc, mkc, log_page_size, page_shift); 1106 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1107 if (populate) { 1108 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 1109 get_octo_len(virt_addr, length, page_shift)); 1110 } 1111 1112 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 1113 if (err) { 1114 mlx5_ib_warn(dev, "create mkey failed\n"); 1115 goto err_2; 1116 } 1117 mr->mmkey.type = MLX5_MKEY_MR; 1118 mr->desc_size = sizeof(struct mlx5_mtt); 1119 mr->dev = dev; 1120 kvfree(in); 1121 1122 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); 1123 1124 return mr; 1125 1126 err_2: 1127 kvfree(in); 1128 1129 err_1: 1130 if (!ibmr) 1131 kfree(mr); 1132 1133 return ERR_PTR(err); 1134 } 1135 1136 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 1137 int npages, u64 length, int access_flags) 1138 { 1139 mr->npages = npages; 1140 atomic_add(npages, &dev->mdev->priv.reg_pages); 1141 mr->ibmr.lkey = mr->mmkey.key; 1142 mr->ibmr.rkey = mr->mmkey.key; 1143 mr->ibmr.length = length; 1144 mr->access_flags = access_flags; 1145 } 1146 1147 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, 1148 u64 length, int acc, int mode) 1149 { 1150 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1151 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1152 struct mlx5_core_dev *mdev = dev->mdev; 1153 struct mlx5_ib_mr *mr; 1154 void *mkc; 1155 u32 *in; 1156 int err; 1157 1158 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1159 if (!mr) 1160 return ERR_PTR(-ENOMEM); 1161 1162 in = kzalloc(inlen, GFP_KERNEL); 1163 if (!in) { 1164 err = -ENOMEM; 1165 goto err_free; 1166 } 1167 1168 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1169 1170 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); 1171 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); 1172 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); 1173 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); 1174 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); 1175 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); 1176 MLX5_SET(mkc, mkc, lr, 1); 1177 1178 MLX5_SET64(mkc, mkc, len, length); 1179 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1180 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1181 MLX5_SET64(mkc, mkc, start_addr, start_addr); 1182 1183 err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); 1184 if (err) 1185 goto err_in; 1186 1187 kfree(in); 1188 1189 mr->umem = NULL; 1190 set_mr_fields(dev, mr, 0, length, acc); 1191 1192 return &mr->ibmr; 1193 1194 err_in: 1195 kfree(in); 1196 1197 err_free: 1198 kfree(mr); 1199 1200 return ERR_PTR(err); 1201 } 1202 1203 int mlx5_ib_advise_mr(struct ib_pd *pd, 1204 enum ib_uverbs_advise_mr_advice advice, 1205 u32 flags, 1206 struct ib_sge *sg_list, 1207 u32 num_sge, 1208 struct uverbs_attr_bundle *attrs) 1209 { 1210 if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && 1211 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE) 1212 return -EOPNOTSUPP; 1213 1214 return mlx5_ib_advise_mr_prefetch(pd, advice, flags, 1215 sg_list, num_sge); 1216 } 1217 1218 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, 1219 struct ib_dm_mr_attr *attr, 1220 struct uverbs_attr_bundle *attrs) 1221 { 1222 struct mlx5_ib_dm *mdm = to_mdm(dm); 1223 struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev; 1224 u64 start_addr = mdm->dev_addr + attr->offset; 1225 int mode; 1226 1227 switch (mdm->type) { 1228 case MLX5_IB_UAPI_DM_TYPE_MEMIC: 1229 if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS) 1230 return ERR_PTR(-EINVAL); 1231 1232 mode = MLX5_MKC_ACCESS_MODE_MEMIC; 1233 start_addr -= pci_resource_start(dev->pdev, 0); 1234 break; 1235 case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: 1236 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: 1237 if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) 1238 return ERR_PTR(-EINVAL); 1239 1240 mode = MLX5_MKC_ACCESS_MODE_SW_ICM; 1241 break; 1242 default: 1243 return ERR_PTR(-EINVAL); 1244 } 1245 1246 return mlx5_ib_get_dm_mr(pd, start_addr, attr->length, 1247 attr->access_flags, mode); 1248 } 1249 1250 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1251 u64 virt_addr, int access_flags, 1252 struct ib_udata *udata) 1253 { 1254 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1255 struct mlx5_ib_mr *mr = NULL; 1256 bool use_umr; 1257 struct ib_umem *umem; 1258 int page_shift; 1259 int npages; 1260 int ncont; 1261 int order; 1262 int err; 1263 1264 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) 1265 return ERR_PTR(-EOPNOTSUPP); 1266 1267 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1268 start, virt_addr, length, access_flags); 1269 1270 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && 1271 length == U64_MAX) { 1272 if (!(access_flags & IB_ACCESS_ON_DEMAND) || 1273 !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) 1274 return ERR_PTR(-EINVAL); 1275 1276 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); 1277 if (IS_ERR(mr)) 1278 return ERR_CAST(mr); 1279 return &mr->ibmr; 1280 } 1281 1282 err = mr_umem_get(dev, udata, start, length, access_flags, &umem, 1283 &npages, &page_shift, &ncont, &order); 1284 1285 if (err < 0) 1286 return ERR_PTR(err); 1287 1288 use_umr = mlx5_ib_can_use_umr(dev, true); 1289 1290 if (order <= mr_cache_max_order(dev) && use_umr) { 1291 mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, 1292 page_shift, order, access_flags); 1293 if (PTR_ERR(mr) == -EAGAIN) { 1294 mlx5_ib_dbg(dev, "cache empty for order %d\n", order); 1295 mr = NULL; 1296 } 1297 } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { 1298 if (access_flags & IB_ACCESS_ON_DEMAND) { 1299 err = -EINVAL; 1300 pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); 1301 goto error; 1302 } 1303 use_umr = false; 1304 } 1305 1306 if (!mr) { 1307 mutex_lock(&dev->slow_path_mutex); 1308 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, 1309 page_shift, access_flags, !use_umr); 1310 mutex_unlock(&dev->slow_path_mutex); 1311 } 1312 1313 if (IS_ERR(mr)) { 1314 err = PTR_ERR(mr); 1315 goto error; 1316 } 1317 1318 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1319 1320 mr->umem = umem; 1321 set_mr_fields(dev, mr, npages, length, access_flags); 1322 1323 if (use_umr) { 1324 int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; 1325 1326 if (access_flags & IB_ACCESS_ON_DEMAND) 1327 update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP; 1328 1329 err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, 1330 update_xlt_flags); 1331 1332 if (err) { 1333 dereg_mr(dev, mr); 1334 return ERR_PTR(err); 1335 } 1336 } 1337 1338 if (is_odp_mr(mr)) { 1339 to_ib_umem_odp(mr->umem)->private = mr; 1340 atomic_set(&mr->num_pending_prefetch, 0); 1341 } 1342 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) 1343 smp_store_release(&mr->live, 1); 1344 1345 return &mr->ibmr; 1346 error: 1347 ib_umem_release(umem); 1348 return ERR_PTR(err); 1349 } 1350 1351 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1352 { 1353 struct mlx5_core_dev *mdev = dev->mdev; 1354 struct mlx5_umr_wr umrwr = {}; 1355 1356 if (mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 1357 return 0; 1358 1359 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | 1360 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1361 umrwr.wr.opcode = MLX5_IB_WR_UMR; 1362 umrwr.pd = dev->umrc.pd; 1363 umrwr.mkey = mr->mmkey.key; 1364 umrwr.ignore_free_state = 1; 1365 1366 return mlx5_ib_post_send_wait(dev, &umrwr); 1367 } 1368 1369 static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1370 int access_flags, int flags) 1371 { 1372 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1373 struct mlx5_umr_wr umrwr = {}; 1374 int err; 1375 1376 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1377 1378 umrwr.wr.opcode = MLX5_IB_WR_UMR; 1379 umrwr.mkey = mr->mmkey.key; 1380 1381 if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) { 1382 umrwr.pd = pd; 1383 umrwr.access_flags = access_flags; 1384 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1385 } 1386 1387 err = mlx5_ib_post_send_wait(dev, &umrwr); 1388 1389 return err; 1390 } 1391 1392 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1393 u64 length, u64 virt_addr, int new_access_flags, 1394 struct ib_pd *new_pd, struct ib_udata *udata) 1395 { 1396 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); 1397 struct mlx5_ib_mr *mr = to_mmr(ib_mr); 1398 struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; 1399 int access_flags = flags & IB_MR_REREG_ACCESS ? 1400 new_access_flags : 1401 mr->access_flags; 1402 int page_shift = 0; 1403 int upd_flags = 0; 1404 int npages = 0; 1405 int ncont = 0; 1406 int order = 0; 1407 u64 addr, len; 1408 int err; 1409 1410 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1411 start, virt_addr, length, access_flags); 1412 1413 atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); 1414 1415 if (!mr->umem) 1416 return -EINVAL; 1417 1418 if (is_odp_mr(mr)) 1419 return -EOPNOTSUPP; 1420 1421 if (flags & IB_MR_REREG_TRANS) { 1422 addr = virt_addr; 1423 len = length; 1424 } else { 1425 addr = mr->umem->address; 1426 len = mr->umem->length; 1427 } 1428 1429 if (flags != IB_MR_REREG_PD) { 1430 /* 1431 * Replace umem. This needs to be done whether or not UMR is 1432 * used. 1433 */ 1434 flags |= IB_MR_REREG_TRANS; 1435 ib_umem_release(mr->umem); 1436 mr->umem = NULL; 1437 err = mr_umem_get(dev, udata, addr, len, access_flags, 1438 &mr->umem, &npages, &page_shift, &ncont, 1439 &order); 1440 if (err) 1441 goto err; 1442 } 1443 1444 if (!mlx5_ib_can_use_umr(dev, true) || 1445 (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) { 1446 /* 1447 * UMR can't be used - MKey needs to be replaced. 1448 */ 1449 if (mr->allocated_from_cache) 1450 err = unreg_umr(dev, mr); 1451 else 1452 err = destroy_mkey(dev, mr); 1453 if (err) 1454 goto err; 1455 1456 mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, 1457 page_shift, access_flags, true); 1458 1459 if (IS_ERR(mr)) { 1460 err = PTR_ERR(mr); 1461 mr = to_mmr(ib_mr); 1462 goto err; 1463 } 1464 1465 mr->allocated_from_cache = 0; 1466 } else { 1467 /* 1468 * Send a UMR WQE 1469 */ 1470 mr->ibmr.pd = pd; 1471 mr->access_flags = access_flags; 1472 mr->mmkey.iova = addr; 1473 mr->mmkey.size = len; 1474 mr->mmkey.pd = to_mpd(pd)->pdn; 1475 1476 if (flags & IB_MR_REREG_TRANS) { 1477 upd_flags = MLX5_IB_UPD_XLT_ADDR; 1478 if (flags & IB_MR_REREG_PD) 1479 upd_flags |= MLX5_IB_UPD_XLT_PD; 1480 if (flags & IB_MR_REREG_ACCESS) 1481 upd_flags |= MLX5_IB_UPD_XLT_ACCESS; 1482 err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, 1483 upd_flags); 1484 } else { 1485 err = rereg_umr(pd, mr, access_flags, flags); 1486 } 1487 1488 if (err) 1489 goto err; 1490 } 1491 1492 set_mr_fields(dev, mr, npages, len, access_flags); 1493 1494 return 0; 1495 1496 err: 1497 ib_umem_release(mr->umem); 1498 mr->umem = NULL; 1499 1500 clean_mr(dev, mr); 1501 return err; 1502 } 1503 1504 static int 1505 mlx5_alloc_priv_descs(struct ib_device *device, 1506 struct mlx5_ib_mr *mr, 1507 int ndescs, 1508 int desc_size) 1509 { 1510 int size = ndescs * desc_size; 1511 int add_size; 1512 int ret; 1513 1514 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); 1515 1516 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); 1517 if (!mr->descs_alloc) 1518 return -ENOMEM; 1519 1520 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); 1521 1522 mr->desc_map = dma_map_single(device->dev.parent, mr->descs, 1523 size, DMA_TO_DEVICE); 1524 if (dma_mapping_error(device->dev.parent, mr->desc_map)) { 1525 ret = -ENOMEM; 1526 goto err; 1527 } 1528 1529 return 0; 1530 err: 1531 kfree(mr->descs_alloc); 1532 1533 return ret; 1534 } 1535 1536 static void 1537 mlx5_free_priv_descs(struct mlx5_ib_mr *mr) 1538 { 1539 if (mr->descs) { 1540 struct ib_device *device = mr->ibmr.device; 1541 int size = mr->max_descs * mr->desc_size; 1542 1543 dma_unmap_single(device->dev.parent, mr->desc_map, 1544 size, DMA_TO_DEVICE); 1545 kfree(mr->descs_alloc); 1546 mr->descs = NULL; 1547 } 1548 } 1549 1550 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1551 { 1552 int allocated_from_cache = mr->allocated_from_cache; 1553 1554 if (mr->sig) { 1555 if (mlx5_core_destroy_psv(dev->mdev, 1556 mr->sig->psv_memory.psv_idx)) 1557 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1558 mr->sig->psv_memory.psv_idx); 1559 if (mlx5_core_destroy_psv(dev->mdev, 1560 mr->sig->psv_wire.psv_idx)) 1561 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1562 mr->sig->psv_wire.psv_idx); 1563 kfree(mr->sig); 1564 mr->sig = NULL; 1565 } 1566 1567 if (!allocated_from_cache) { 1568 destroy_mkey(dev, mr); 1569 mlx5_free_priv_descs(mr); 1570 } 1571 } 1572 1573 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1574 { 1575 int npages = mr->npages; 1576 struct ib_umem *umem = mr->umem; 1577 1578 if (is_odp_mr(mr)) { 1579 struct ib_umem_odp *umem_odp = to_ib_umem_odp(umem); 1580 1581 /* Prevent new page faults and 1582 * prefetch requests from succeeding 1583 */ 1584 WRITE_ONCE(mr->live, 0); 1585 1586 /* Wait for all running page-fault handlers to finish. */ 1587 synchronize_srcu(&dev->mr_srcu); 1588 1589 /* dequeue pending prefetch requests for the mr */ 1590 if (atomic_read(&mr->num_pending_prefetch)) 1591 flush_workqueue(system_unbound_wq); 1592 WARN_ON(atomic_read(&mr->num_pending_prefetch)); 1593 1594 /* Destroy all page mappings */ 1595 if (!umem_odp->is_implicit_odp) 1596 mlx5_ib_invalidate_range(umem_odp, 1597 ib_umem_start(umem_odp), 1598 ib_umem_end(umem_odp)); 1599 else 1600 mlx5_ib_free_implicit_mr(mr); 1601 /* 1602 * We kill the umem before the MR for ODP, 1603 * so that there will not be any invalidations in 1604 * flight, looking at the *mr struct. 1605 */ 1606 ib_umem_odp_release(umem_odp); 1607 atomic_sub(npages, &dev->mdev->priv.reg_pages); 1608 1609 /* Avoid double-freeing the umem. */ 1610 umem = NULL; 1611 } 1612 1613 clean_mr(dev, mr); 1614 1615 /* 1616 * We should unregister the DMA address from the HCA before 1617 * remove the DMA mapping. 1618 */ 1619 mlx5_mr_cache_free(dev, mr); 1620 ib_umem_release(umem); 1621 if (umem) 1622 atomic_sub(npages, &dev->mdev->priv.reg_pages); 1623 1624 if (!mr->allocated_from_cache) 1625 kfree(mr); 1626 } 1627 1628 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 1629 { 1630 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1631 1632 if (ibmr->type == IB_MR_TYPE_INTEGRITY) { 1633 dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr); 1634 dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr); 1635 } 1636 1637 dereg_mr(to_mdev(ibmr->device), mmr); 1638 1639 return 0; 1640 } 1641 1642 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, 1643 int access_mode, int page_shift) 1644 { 1645 void *mkc; 1646 1647 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1648 1649 MLX5_SET(mkc, mkc, free, 1); 1650 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1651 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1652 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 1653 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); 1654 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); 1655 MLX5_SET(mkc, mkc, umr_en, 1); 1656 MLX5_SET(mkc, mkc, log_page_size, page_shift); 1657 } 1658 1659 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1660 int ndescs, int desc_size, int page_shift, 1661 int access_mode, u32 *in, int inlen) 1662 { 1663 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1664 int err; 1665 1666 mr->access_mode = access_mode; 1667 mr->desc_size = desc_size; 1668 mr->max_descs = ndescs; 1669 1670 err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); 1671 if (err) 1672 return err; 1673 1674 mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); 1675 1676 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 1677 if (err) 1678 goto err_free_descs; 1679 1680 mr->mmkey.type = MLX5_MKEY_MR; 1681 mr->ibmr.lkey = mr->mmkey.key; 1682 mr->ibmr.rkey = mr->mmkey.key; 1683 1684 return 0; 1685 1686 err_free_descs: 1687 mlx5_free_priv_descs(mr); 1688 return err; 1689 } 1690 1691 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, 1692 u32 max_num_sg, u32 max_num_meta_sg, 1693 int desc_size, int access_mode) 1694 { 1695 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1696 int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); 1697 int page_shift = 0; 1698 struct mlx5_ib_mr *mr; 1699 u32 *in; 1700 int err; 1701 1702 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1703 if (!mr) 1704 return ERR_PTR(-ENOMEM); 1705 1706 mr->ibmr.pd = pd; 1707 mr->ibmr.device = pd->device; 1708 1709 in = kzalloc(inlen, GFP_KERNEL); 1710 if (!in) { 1711 err = -ENOMEM; 1712 goto err_free; 1713 } 1714 1715 if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) 1716 page_shift = PAGE_SHIFT; 1717 1718 err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift, 1719 access_mode, in, inlen); 1720 if (err) 1721 goto err_free_in; 1722 1723 mr->umem = NULL; 1724 kfree(in); 1725 1726 return mr; 1727 1728 err_free_in: 1729 kfree(in); 1730 err_free: 1731 kfree(mr); 1732 return ERR_PTR(err); 1733 } 1734 1735 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1736 int ndescs, u32 *in, int inlen) 1737 { 1738 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt), 1739 PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in, 1740 inlen); 1741 } 1742 1743 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1744 int ndescs, u32 *in, int inlen) 1745 { 1746 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm), 1747 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 1748 } 1749 1750 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1751 int max_num_sg, int max_num_meta_sg, 1752 u32 *in, int inlen) 1753 { 1754 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1755 u32 psv_index[2]; 1756 void *mkc; 1757 int err; 1758 1759 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 1760 if (!mr->sig) 1761 return -ENOMEM; 1762 1763 /* create mem & wire PSVs */ 1764 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index); 1765 if (err) 1766 goto err_free_sig; 1767 1768 mr->sig->psv_memory.psv_idx = psv_index[0]; 1769 mr->sig->psv_wire.psv_idx = psv_index[1]; 1770 1771 mr->sig->sig_status_checked = true; 1772 mr->sig->sig_err_exists = false; 1773 /* Next UMR, Arm SIGERR */ 1774 ++mr->sig->sigerr_count; 1775 mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 1776 sizeof(struct mlx5_klm), 1777 MLX5_MKC_ACCESS_MODE_KLMS); 1778 if (IS_ERR(mr->klm_mr)) { 1779 err = PTR_ERR(mr->klm_mr); 1780 goto err_destroy_psv; 1781 } 1782 mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 1783 sizeof(struct mlx5_mtt), 1784 MLX5_MKC_ACCESS_MODE_MTT); 1785 if (IS_ERR(mr->mtt_mr)) { 1786 err = PTR_ERR(mr->mtt_mr); 1787 goto err_free_klm_mr; 1788 } 1789 1790 /* Set bsf descriptors for mkey */ 1791 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1792 MLX5_SET(mkc, mkc, bsf_en, 1); 1793 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); 1794 1795 err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0, 1796 MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 1797 if (err) 1798 goto err_free_mtt_mr; 1799 1800 return 0; 1801 1802 err_free_mtt_mr: 1803 dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); 1804 mr->mtt_mr = NULL; 1805 err_free_klm_mr: 1806 dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr); 1807 mr->klm_mr = NULL; 1808 err_destroy_psv: 1809 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) 1810 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1811 mr->sig->psv_memory.psv_idx); 1812 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) 1813 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1814 mr->sig->psv_wire.psv_idx); 1815 err_free_sig: 1816 kfree(mr->sig); 1817 1818 return err; 1819 } 1820 1821 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, 1822 enum ib_mr_type mr_type, u32 max_num_sg, 1823 u32 max_num_meta_sg) 1824 { 1825 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1826 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1827 int ndescs = ALIGN(max_num_sg, 4); 1828 struct mlx5_ib_mr *mr; 1829 u32 *in; 1830 int err; 1831 1832 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1833 if (!mr) 1834 return ERR_PTR(-ENOMEM); 1835 1836 in = kzalloc(inlen, GFP_KERNEL); 1837 if (!in) { 1838 err = -ENOMEM; 1839 goto err_free; 1840 } 1841 1842 mr->ibmr.device = pd->device; 1843 mr->umem = NULL; 1844 1845 switch (mr_type) { 1846 case IB_MR_TYPE_MEM_REG: 1847 err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen); 1848 break; 1849 case IB_MR_TYPE_SG_GAPS: 1850 err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen); 1851 break; 1852 case IB_MR_TYPE_INTEGRITY: 1853 err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg, 1854 max_num_meta_sg, in, inlen); 1855 break; 1856 default: 1857 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); 1858 err = -EINVAL; 1859 } 1860 1861 if (err) 1862 goto err_free_in; 1863 1864 kfree(in); 1865 1866 return &mr->ibmr; 1867 1868 err_free_in: 1869 kfree(in); 1870 err_free: 1871 kfree(mr); 1872 return ERR_PTR(err); 1873 } 1874 1875 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 1876 u32 max_num_sg, struct ib_udata *udata) 1877 { 1878 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); 1879 } 1880 1881 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, 1882 u32 max_num_sg, u32 max_num_meta_sg) 1883 { 1884 return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg, 1885 max_num_meta_sg); 1886 } 1887 1888 struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 1889 struct ib_udata *udata) 1890 { 1891 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1892 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1893 struct mlx5_ib_mw *mw = NULL; 1894 u32 *in = NULL; 1895 void *mkc; 1896 int ndescs; 1897 int err; 1898 struct mlx5_ib_alloc_mw req = {}; 1899 struct { 1900 __u32 comp_mask; 1901 __u32 response_length; 1902 } resp = {}; 1903 1904 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 1905 if (err) 1906 return ERR_PTR(err); 1907 1908 if (req.comp_mask || req.reserved1 || req.reserved2) 1909 return ERR_PTR(-EOPNOTSUPP); 1910 1911 if (udata->inlen > sizeof(req) && 1912 !ib_is_udata_cleared(udata, sizeof(req), 1913 udata->inlen - sizeof(req))) 1914 return ERR_PTR(-EOPNOTSUPP); 1915 1916 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); 1917 1918 mw = kzalloc(sizeof(*mw), GFP_KERNEL); 1919 in = kzalloc(inlen, GFP_KERNEL); 1920 if (!mw || !in) { 1921 err = -ENOMEM; 1922 goto free; 1923 } 1924 1925 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1926 1927 MLX5_SET(mkc, mkc, free, 1); 1928 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 1929 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1930 MLX5_SET(mkc, mkc, umr_en, 1); 1931 MLX5_SET(mkc, mkc, lr, 1); 1932 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); 1933 MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); 1934 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1935 1936 err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); 1937 if (err) 1938 goto free; 1939 1940 mw->mmkey.type = MLX5_MKEY_MW; 1941 mw->ibmw.rkey = mw->mmkey.key; 1942 mw->ndescs = ndescs; 1943 1944 resp.response_length = min(offsetof(typeof(resp), response_length) + 1945 sizeof(resp.response_length), udata->outlen); 1946 if (resp.response_length) { 1947 err = ib_copy_to_udata(udata, &resp, resp.response_length); 1948 if (err) { 1949 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); 1950 goto free; 1951 } 1952 } 1953 1954 kfree(in); 1955 return &mw->ibmw; 1956 1957 free: 1958 kfree(mw); 1959 kfree(in); 1960 return ERR_PTR(err); 1961 } 1962 1963 int mlx5_ib_dealloc_mw(struct ib_mw *mw) 1964 { 1965 struct mlx5_ib_dev *dev = to_mdev(mw->device); 1966 struct mlx5_ib_mw *mmw = to_mmw(mw); 1967 int err; 1968 1969 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { 1970 xa_erase_irq(&dev->mdev->priv.mkey_table, 1971 mlx5_base_mkey(mmw->mmkey.key)); 1972 /* 1973 * pagefault_single_data_segment() may be accessing mmw under 1974 * SRCU if the user bound an ODP MR to this MW. 1975 */ 1976 synchronize_srcu(&dev->mr_srcu); 1977 } 1978 1979 err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); 1980 if (err) 1981 return err; 1982 kfree(mmw); 1983 return 0; 1984 } 1985 1986 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1987 struct ib_mr_status *mr_status) 1988 { 1989 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1990 int ret = 0; 1991 1992 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 1993 pr_err("Invalid status check mask\n"); 1994 ret = -EINVAL; 1995 goto done; 1996 } 1997 1998 mr_status->fail_status = 0; 1999 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 2000 if (!mmr->sig) { 2001 ret = -EINVAL; 2002 pr_err("signature status check requested on a non-signature enabled MR\n"); 2003 goto done; 2004 } 2005 2006 mmr->sig->sig_status_checked = true; 2007 if (!mmr->sig->sig_err_exists) 2008 goto done; 2009 2010 if (ibmr->lkey == mmr->sig->err_item.key) 2011 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 2012 sizeof(mr_status->sig_err)); 2013 else { 2014 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 2015 mr_status->sig_err.sig_err_offset = 0; 2016 mr_status->sig_err.key = mmr->sig->err_item.key; 2017 } 2018 2019 mmr->sig->sig_err_exists = false; 2020 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 2021 } 2022 2023 done: 2024 return ret; 2025 } 2026 2027 static int 2028 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2029 int data_sg_nents, unsigned int *data_sg_offset, 2030 struct scatterlist *meta_sg, int meta_sg_nents, 2031 unsigned int *meta_sg_offset) 2032 { 2033 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2034 unsigned int sg_offset = 0; 2035 int n = 0; 2036 2037 mr->meta_length = 0; 2038 if (data_sg_nents == 1) { 2039 n++; 2040 mr->ndescs = 1; 2041 if (data_sg_offset) 2042 sg_offset = *data_sg_offset; 2043 mr->data_length = sg_dma_len(data_sg) - sg_offset; 2044 mr->data_iova = sg_dma_address(data_sg) + sg_offset; 2045 if (meta_sg_nents == 1) { 2046 n++; 2047 mr->meta_ndescs = 1; 2048 if (meta_sg_offset) 2049 sg_offset = *meta_sg_offset; 2050 else 2051 sg_offset = 0; 2052 mr->meta_length = sg_dma_len(meta_sg) - sg_offset; 2053 mr->pi_iova = sg_dma_address(meta_sg) + sg_offset; 2054 } 2055 ibmr->length = mr->data_length + mr->meta_length; 2056 } 2057 2058 return n; 2059 } 2060 2061 static int 2062 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 2063 struct scatterlist *sgl, 2064 unsigned short sg_nents, 2065 unsigned int *sg_offset_p, 2066 struct scatterlist *meta_sgl, 2067 unsigned short meta_sg_nents, 2068 unsigned int *meta_sg_offset_p) 2069 { 2070 struct scatterlist *sg = sgl; 2071 struct mlx5_klm *klms = mr->descs; 2072 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 2073 u32 lkey = mr->ibmr.pd->local_dma_lkey; 2074 int i, j = 0; 2075 2076 mr->ibmr.iova = sg_dma_address(sg) + sg_offset; 2077 mr->ibmr.length = 0; 2078 2079 for_each_sg(sgl, sg, sg_nents, i) { 2080 if (unlikely(i >= mr->max_descs)) 2081 break; 2082 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); 2083 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); 2084 klms[i].key = cpu_to_be32(lkey); 2085 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2086 2087 sg_offset = 0; 2088 } 2089 2090 if (sg_offset_p) 2091 *sg_offset_p = sg_offset; 2092 2093 mr->ndescs = i; 2094 mr->data_length = mr->ibmr.length; 2095 2096 if (meta_sg_nents) { 2097 sg = meta_sgl; 2098 sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0; 2099 for_each_sg(meta_sgl, sg, meta_sg_nents, j) { 2100 if (unlikely(i + j >= mr->max_descs)) 2101 break; 2102 klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + 2103 sg_offset); 2104 klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - 2105 sg_offset); 2106 klms[i + j].key = cpu_to_be32(lkey); 2107 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2108 2109 sg_offset = 0; 2110 } 2111 if (meta_sg_offset_p) 2112 *meta_sg_offset_p = sg_offset; 2113 2114 mr->meta_ndescs = j; 2115 mr->meta_length = mr->ibmr.length - mr->data_length; 2116 } 2117 2118 return i + j; 2119 } 2120 2121 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 2122 { 2123 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2124 __be64 *descs; 2125 2126 if (unlikely(mr->ndescs == mr->max_descs)) 2127 return -ENOMEM; 2128 2129 descs = mr->descs; 2130 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2131 2132 return 0; 2133 } 2134 2135 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr) 2136 { 2137 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2138 __be64 *descs; 2139 2140 if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs)) 2141 return -ENOMEM; 2142 2143 descs = mr->descs; 2144 descs[mr->ndescs + mr->meta_ndescs++] = 2145 cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2146 2147 return 0; 2148 } 2149 2150 static int 2151 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2152 int data_sg_nents, unsigned int *data_sg_offset, 2153 struct scatterlist *meta_sg, int meta_sg_nents, 2154 unsigned int *meta_sg_offset) 2155 { 2156 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2157 struct mlx5_ib_mr *pi_mr = mr->mtt_mr; 2158 int n; 2159 2160 pi_mr->ndescs = 0; 2161 pi_mr->meta_ndescs = 0; 2162 pi_mr->meta_length = 0; 2163 2164 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2165 pi_mr->desc_size * pi_mr->max_descs, 2166 DMA_TO_DEVICE); 2167 2168 pi_mr->ibmr.page_size = ibmr->page_size; 2169 n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset, 2170 mlx5_set_page); 2171 if (n != data_sg_nents) 2172 return n; 2173 2174 pi_mr->data_iova = pi_mr->ibmr.iova; 2175 pi_mr->data_length = pi_mr->ibmr.length; 2176 pi_mr->ibmr.length = pi_mr->data_length; 2177 ibmr->length = pi_mr->data_length; 2178 2179 if (meta_sg_nents) { 2180 u64 page_mask = ~((u64)ibmr->page_size - 1); 2181 u64 iova = pi_mr->data_iova; 2182 2183 n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, 2184 meta_sg_offset, mlx5_set_page_pi); 2185 2186 pi_mr->meta_length = pi_mr->ibmr.length; 2187 /* 2188 * PI address for the HW is the offset of the metadata address 2189 * relative to the first data page address. 2190 * It equals to first data page address + size of data pages + 2191 * metadata offset at the first metadata page 2192 */ 2193 pi_mr->pi_iova = (iova & page_mask) + 2194 pi_mr->ndescs * ibmr->page_size + 2195 (pi_mr->ibmr.iova & ~page_mask); 2196 /* 2197 * In order to use one MTT MR for data and metadata, we register 2198 * also the gaps between the end of the data and the start of 2199 * the metadata (the sig MR will verify that the HW will access 2200 * to right addresses). This mapping is safe because we use 2201 * internal mkey for the registration. 2202 */ 2203 pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova; 2204 pi_mr->ibmr.iova = iova; 2205 ibmr->length += pi_mr->meta_length; 2206 } 2207 2208 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2209 pi_mr->desc_size * pi_mr->max_descs, 2210 DMA_TO_DEVICE); 2211 2212 return n; 2213 } 2214 2215 static int 2216 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2217 int data_sg_nents, unsigned int *data_sg_offset, 2218 struct scatterlist *meta_sg, int meta_sg_nents, 2219 unsigned int *meta_sg_offset) 2220 { 2221 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2222 struct mlx5_ib_mr *pi_mr = mr->klm_mr; 2223 int n; 2224 2225 pi_mr->ndescs = 0; 2226 pi_mr->meta_ndescs = 0; 2227 pi_mr->meta_length = 0; 2228 2229 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2230 pi_mr->desc_size * pi_mr->max_descs, 2231 DMA_TO_DEVICE); 2232 2233 n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset, 2234 meta_sg, meta_sg_nents, meta_sg_offset); 2235 2236 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2237 pi_mr->desc_size * pi_mr->max_descs, 2238 DMA_TO_DEVICE); 2239 2240 /* This is zero-based memory region */ 2241 pi_mr->data_iova = 0; 2242 pi_mr->ibmr.iova = 0; 2243 pi_mr->pi_iova = pi_mr->data_length; 2244 ibmr->length = pi_mr->ibmr.length; 2245 2246 return n; 2247 } 2248 2249 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2250 int data_sg_nents, unsigned int *data_sg_offset, 2251 struct scatterlist *meta_sg, int meta_sg_nents, 2252 unsigned int *meta_sg_offset) 2253 { 2254 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2255 struct mlx5_ib_mr *pi_mr = NULL; 2256 int n; 2257 2258 WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); 2259 2260 mr->ndescs = 0; 2261 mr->data_length = 0; 2262 mr->data_iova = 0; 2263 mr->meta_ndescs = 0; 2264 mr->pi_iova = 0; 2265 /* 2266 * As a performance optimization, if possible, there is no need to 2267 * perform UMR operation to register the data/metadata buffers. 2268 * First try to map the sg lists to PA descriptors with local_dma_lkey. 2269 * Fallback to UMR only in case of a failure. 2270 */ 2271 n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2272 data_sg_offset, meta_sg, meta_sg_nents, 2273 meta_sg_offset); 2274 if (n == data_sg_nents + meta_sg_nents) 2275 goto out; 2276 /* 2277 * As a performance optimization, if possible, there is no need to map 2278 * the sg lists to KLM descriptors. First try to map the sg lists to MTT 2279 * descriptors and fallback to KLM only in case of a failure. 2280 * It's more efficient for the HW to work with MTT descriptors 2281 * (especially in high load). 2282 * Use KLM (indirect access) only if it's mandatory. 2283 */ 2284 pi_mr = mr->mtt_mr; 2285 n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2286 data_sg_offset, meta_sg, meta_sg_nents, 2287 meta_sg_offset); 2288 if (n == data_sg_nents + meta_sg_nents) 2289 goto out; 2290 2291 pi_mr = mr->klm_mr; 2292 n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2293 data_sg_offset, meta_sg, meta_sg_nents, 2294 meta_sg_offset); 2295 if (unlikely(n != data_sg_nents + meta_sg_nents)) 2296 return -ENOMEM; 2297 2298 out: 2299 /* This is zero-based memory region */ 2300 ibmr->iova = 0; 2301 mr->pi_mr = pi_mr; 2302 if (pi_mr) 2303 ibmr->sig_attrs->meta_length = pi_mr->meta_length; 2304 else 2305 ibmr->sig_attrs->meta_length = mr->meta_length; 2306 2307 return 0; 2308 } 2309 2310 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 2311 unsigned int *sg_offset) 2312 { 2313 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2314 int n; 2315 2316 mr->ndescs = 0; 2317 2318 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, 2319 mr->desc_size * mr->max_descs, 2320 DMA_TO_DEVICE); 2321 2322 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) 2323 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0, 2324 NULL); 2325 else 2326 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 2327 mlx5_set_page); 2328 2329 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 2330 mr->desc_size * mr->max_descs, 2331 DMA_TO_DEVICE); 2332 2333 return n; 2334 } 2335