1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 34 #include <linux/kref.h> 35 #include <linux/random.h> 36 #include <linux/debugfs.h> 37 #include <linux/export.h> 38 #include <linux/delay.h> 39 #include <rdma/ib_umem.h> 40 #include <rdma/ib_umem_odp.h> 41 #include <rdma/ib_verbs.h> 42 #include "mlx5_ib.h" 43 44 enum { 45 MAX_PENDING_REG_MR = 8, 46 }; 47 48 #define MLX5_UMR_ALIGN 2048 49 50 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 51 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 52 static int mr_cache_max_order(struct mlx5_ib_dev *dev); 53 54 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) 55 { 56 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); 57 } 58 59 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 60 { 61 WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); 62 63 return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 64 } 65 66 static int order2idx(struct mlx5_ib_dev *dev, int order) 67 { 68 struct mlx5_mr_cache *cache = &dev->cache; 69 70 if (order < cache->ent[0].order) 71 return 0; 72 else 73 return order - cache->ent[0].order; 74 } 75 76 static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) 77 { 78 return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= 79 length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); 80 } 81 82 static void reg_mr_callback(int status, struct mlx5_async_work *context) 83 { 84 struct mlx5_ib_mr *mr = 85 container_of(context, struct mlx5_ib_mr, cb_work); 86 struct mlx5_ib_dev *dev = mr->dev; 87 struct mlx5_mr_cache *cache = &dev->cache; 88 int c = order2idx(dev, mr->order); 89 struct mlx5_cache_ent *ent = &cache->ent[c]; 90 u8 key; 91 unsigned long flags; 92 93 spin_lock_irqsave(&ent->lock, flags); 94 ent->pending--; 95 spin_unlock_irqrestore(&ent->lock, flags); 96 if (status) { 97 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 98 kfree(mr); 99 dev->fill_delay = 1; 100 mod_timer(&dev->delay_timer, jiffies + HZ); 101 return; 102 } 103 104 mr->mmkey.type = MLX5_MKEY_MR; 105 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 106 key = dev->mdev->priv.mkey_key++; 107 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 108 mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; 109 110 cache->last_add = jiffies; 111 112 spin_lock_irqsave(&ent->lock, flags); 113 list_add_tail(&mr->list, &ent->head); 114 ent->cur++; 115 ent->size++; 116 spin_unlock_irqrestore(&ent->lock, flags); 117 118 if (!completion_done(&ent->compl)) 119 complete(&ent->compl); 120 } 121 122 static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 123 { 124 struct mlx5_mr_cache *cache = &dev->cache; 125 struct mlx5_cache_ent *ent = &cache->ent[c]; 126 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 127 struct mlx5_ib_mr *mr; 128 void *mkc; 129 u32 *in; 130 int err = 0; 131 int i; 132 133 in = kzalloc(inlen, GFP_KERNEL); 134 if (!in) 135 return -ENOMEM; 136 137 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 138 for (i = 0; i < num; i++) { 139 if (ent->pending >= MAX_PENDING_REG_MR) { 140 err = -EAGAIN; 141 break; 142 } 143 144 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 145 if (!mr) { 146 err = -ENOMEM; 147 break; 148 } 149 mr->order = ent->order; 150 mr->allocated_from_cache = 1; 151 mr->dev = dev; 152 153 MLX5_SET(mkc, mkc, free, 1); 154 MLX5_SET(mkc, mkc, umr_en, 1); 155 MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); 156 MLX5_SET(mkc, mkc, access_mode_4_2, 157 (ent->access_mode >> 2) & 0x7); 158 159 MLX5_SET(mkc, mkc, qpn, 0xffffff); 160 MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); 161 MLX5_SET(mkc, mkc, log_page_size, ent->page); 162 163 spin_lock_irq(&ent->lock); 164 ent->pending++; 165 spin_unlock_irq(&ent->lock); 166 err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, 167 &dev->async_ctx, in, inlen, 168 mr->out, sizeof(mr->out), 169 reg_mr_callback, &mr->cb_work); 170 if (err) { 171 spin_lock_irq(&ent->lock); 172 ent->pending--; 173 spin_unlock_irq(&ent->lock); 174 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 175 kfree(mr); 176 break; 177 } 178 } 179 180 kfree(in); 181 return err; 182 } 183 184 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 185 { 186 struct mlx5_mr_cache *cache = &dev->cache; 187 struct mlx5_cache_ent *ent = &cache->ent[c]; 188 struct mlx5_ib_mr *tmp_mr; 189 struct mlx5_ib_mr *mr; 190 LIST_HEAD(del_list); 191 int i; 192 193 for (i = 0; i < num; i++) { 194 spin_lock_irq(&ent->lock); 195 if (list_empty(&ent->head)) { 196 spin_unlock_irq(&ent->lock); 197 break; 198 } 199 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 200 list_move(&mr->list, &del_list); 201 ent->cur--; 202 ent->size--; 203 spin_unlock_irq(&ent->lock); 204 mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 205 } 206 207 list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { 208 list_del(&mr->list); 209 kfree(mr); 210 } 211 } 212 213 static ssize_t size_write(struct file *filp, const char __user *buf, 214 size_t count, loff_t *pos) 215 { 216 struct mlx5_cache_ent *ent = filp->private_data; 217 struct mlx5_ib_dev *dev = ent->dev; 218 char lbuf[20] = {0}; 219 u32 var; 220 int err; 221 int c; 222 223 count = min(count, sizeof(lbuf) - 1); 224 if (copy_from_user(lbuf, buf, count)) 225 return -EFAULT; 226 227 c = order2idx(dev, ent->order); 228 229 if (sscanf(lbuf, "%u", &var) != 1) 230 return -EINVAL; 231 232 if (var < ent->limit) 233 return -EINVAL; 234 235 if (var > ent->size) { 236 do { 237 err = add_keys(dev, c, var - ent->size); 238 if (err && err != -EAGAIN) 239 return err; 240 241 usleep_range(3000, 5000); 242 } while (err); 243 } else if (var < ent->size) { 244 remove_keys(dev, c, ent->size - var); 245 } 246 247 return count; 248 } 249 250 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 251 loff_t *pos) 252 { 253 struct mlx5_cache_ent *ent = filp->private_data; 254 char lbuf[20]; 255 int err; 256 257 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 258 if (err < 0) 259 return err; 260 261 return simple_read_from_buffer(buf, count, pos, lbuf, err); 262 } 263 264 static const struct file_operations size_fops = { 265 .owner = THIS_MODULE, 266 .open = simple_open, 267 .write = size_write, 268 .read = size_read, 269 }; 270 271 static ssize_t limit_write(struct file *filp, const char __user *buf, 272 size_t count, loff_t *pos) 273 { 274 struct mlx5_cache_ent *ent = filp->private_data; 275 struct mlx5_ib_dev *dev = ent->dev; 276 char lbuf[20] = {0}; 277 u32 var; 278 int err; 279 int c; 280 281 count = min(count, sizeof(lbuf) - 1); 282 if (copy_from_user(lbuf, buf, count)) 283 return -EFAULT; 284 285 c = order2idx(dev, ent->order); 286 287 if (sscanf(lbuf, "%u", &var) != 1) 288 return -EINVAL; 289 290 if (var > ent->size) 291 return -EINVAL; 292 293 ent->limit = var; 294 295 if (ent->cur < ent->limit) { 296 err = add_keys(dev, c, 2 * ent->limit - ent->cur); 297 if (err) 298 return err; 299 } 300 301 return count; 302 } 303 304 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 305 loff_t *pos) 306 { 307 struct mlx5_cache_ent *ent = filp->private_data; 308 char lbuf[20]; 309 int err; 310 311 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 312 if (err < 0) 313 return err; 314 315 return simple_read_from_buffer(buf, count, pos, lbuf, err); 316 } 317 318 static const struct file_operations limit_fops = { 319 .owner = THIS_MODULE, 320 .open = simple_open, 321 .write = limit_write, 322 .read = limit_read, 323 }; 324 325 static int someone_adding(struct mlx5_mr_cache *cache) 326 { 327 int i; 328 329 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 330 if (cache->ent[i].cur < cache->ent[i].limit) 331 return 1; 332 } 333 334 return 0; 335 } 336 337 static void __cache_work_func(struct mlx5_cache_ent *ent) 338 { 339 struct mlx5_ib_dev *dev = ent->dev; 340 struct mlx5_mr_cache *cache = &dev->cache; 341 int i = order2idx(dev, ent->order); 342 int err; 343 344 if (cache->stopped) 345 return; 346 347 ent = &dev->cache.ent[i]; 348 if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 349 err = add_keys(dev, i, 1); 350 if (ent->cur < 2 * ent->limit) { 351 if (err == -EAGAIN) { 352 mlx5_ib_dbg(dev, "returned eagain, order %d\n", 353 i + 2); 354 queue_delayed_work(cache->wq, &ent->dwork, 355 msecs_to_jiffies(3)); 356 } else if (err) { 357 mlx5_ib_warn(dev, "command failed order %d, err %d\n", 358 i + 2, err); 359 queue_delayed_work(cache->wq, &ent->dwork, 360 msecs_to_jiffies(1000)); 361 } else { 362 queue_work(cache->wq, &ent->work); 363 } 364 } 365 } else if (ent->cur > 2 * ent->limit) { 366 /* 367 * The remove_keys() logic is performed as garbage collection 368 * task. Such task is intended to be run when no other active 369 * processes are running. 370 * 371 * The need_resched() will return TRUE if there are user tasks 372 * to be activated in near future. 373 * 374 * In such case, we don't execute remove_keys() and postpone 375 * the garbage collection work to try to run in next cycle, 376 * in order to free CPU resources to other tasks. 377 */ 378 if (!need_resched() && !someone_adding(cache) && 379 time_after(jiffies, cache->last_add + 300 * HZ)) { 380 remove_keys(dev, i, 1); 381 if (ent->cur > ent->limit) 382 queue_work(cache->wq, &ent->work); 383 } else { 384 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 385 } 386 } 387 } 388 389 static void delayed_cache_work_func(struct work_struct *work) 390 { 391 struct mlx5_cache_ent *ent; 392 393 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 394 __cache_work_func(ent); 395 } 396 397 static void cache_work_func(struct work_struct *work) 398 { 399 struct mlx5_cache_ent *ent; 400 401 ent = container_of(work, struct mlx5_cache_ent, work); 402 __cache_work_func(ent); 403 } 404 405 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) 406 { 407 struct mlx5_mr_cache *cache = &dev->cache; 408 struct mlx5_cache_ent *ent; 409 struct mlx5_ib_mr *mr; 410 int err; 411 412 if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { 413 mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); 414 return ERR_PTR(-EINVAL); 415 } 416 417 ent = &cache->ent[entry]; 418 while (1) { 419 spin_lock_irq(&ent->lock); 420 if (list_empty(&ent->head)) { 421 spin_unlock_irq(&ent->lock); 422 423 err = add_keys(dev, entry, 1); 424 if (err && err != -EAGAIN) 425 return ERR_PTR(err); 426 427 wait_for_completion(&ent->compl); 428 } else { 429 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 430 list); 431 list_del(&mr->list); 432 ent->cur--; 433 spin_unlock_irq(&ent->lock); 434 if (ent->cur < ent->limit) 435 queue_work(cache->wq, &ent->work); 436 return mr; 437 } 438 } 439 } 440 441 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 442 { 443 struct mlx5_mr_cache *cache = &dev->cache; 444 struct mlx5_ib_mr *mr = NULL; 445 struct mlx5_cache_ent *ent; 446 int last_umr_cache_entry; 447 int c; 448 int i; 449 450 c = order2idx(dev, order); 451 last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev)); 452 if (c < 0 || c > last_umr_cache_entry) { 453 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 454 return NULL; 455 } 456 457 for (i = c; i <= last_umr_cache_entry; i++) { 458 ent = &cache->ent[i]; 459 460 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 461 462 spin_lock_irq(&ent->lock); 463 if (!list_empty(&ent->head)) { 464 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 465 list); 466 list_del(&mr->list); 467 ent->cur--; 468 spin_unlock_irq(&ent->lock); 469 if (ent->cur < ent->limit) 470 queue_work(cache->wq, &ent->work); 471 break; 472 } 473 spin_unlock_irq(&ent->lock); 474 475 queue_work(cache->wq, &ent->work); 476 } 477 478 if (!mr) 479 cache->ent[c].miss++; 480 481 return mr; 482 } 483 484 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 485 { 486 struct mlx5_mr_cache *cache = &dev->cache; 487 struct mlx5_cache_ent *ent; 488 int shrink = 0; 489 int c; 490 491 if (!mr->allocated_from_cache) 492 return; 493 494 c = order2idx(dev, mr->order); 495 WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); 496 497 if (mlx5_mr_cache_invalidate(mr)) { 498 mr->allocated_from_cache = false; 499 destroy_mkey(dev, mr); 500 ent = &cache->ent[c]; 501 if (ent->cur < ent->limit) 502 queue_work(cache->wq, &ent->work); 503 return; 504 } 505 506 ent = &cache->ent[c]; 507 spin_lock_irq(&ent->lock); 508 list_add_tail(&mr->list, &ent->head); 509 ent->cur++; 510 if (ent->cur > 2 * ent->limit) 511 shrink = 1; 512 spin_unlock_irq(&ent->lock); 513 514 if (shrink) 515 queue_work(cache->wq, &ent->work); 516 } 517 518 static void clean_keys(struct mlx5_ib_dev *dev, int c) 519 { 520 struct mlx5_mr_cache *cache = &dev->cache; 521 struct mlx5_cache_ent *ent = &cache->ent[c]; 522 struct mlx5_ib_mr *tmp_mr; 523 struct mlx5_ib_mr *mr; 524 LIST_HEAD(del_list); 525 526 cancel_delayed_work(&ent->dwork); 527 while (1) { 528 spin_lock_irq(&ent->lock); 529 if (list_empty(&ent->head)) { 530 spin_unlock_irq(&ent->lock); 531 break; 532 } 533 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 534 list_move(&mr->list, &del_list); 535 ent->cur--; 536 ent->size--; 537 spin_unlock_irq(&ent->lock); 538 mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 539 } 540 541 list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { 542 list_del(&mr->list); 543 kfree(mr); 544 } 545 } 546 547 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 548 { 549 if (!mlx5_debugfs_root || dev->is_rep) 550 return; 551 552 debugfs_remove_recursive(dev->cache.root); 553 dev->cache.root = NULL; 554 } 555 556 static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) 557 { 558 struct mlx5_mr_cache *cache = &dev->cache; 559 struct mlx5_cache_ent *ent; 560 struct dentry *dir; 561 int i; 562 563 if (!mlx5_debugfs_root || dev->is_rep) 564 return; 565 566 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); 567 568 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 569 ent = &cache->ent[i]; 570 sprintf(ent->name, "%d", ent->order); 571 dir = debugfs_create_dir(ent->name, cache->root); 572 debugfs_create_file("size", 0600, dir, ent, &size_fops); 573 debugfs_create_file("limit", 0600, dir, ent, &limit_fops); 574 debugfs_create_u32("cur", 0400, dir, &ent->cur); 575 debugfs_create_u32("miss", 0600, dir, &ent->miss); 576 } 577 } 578 579 static void delay_time_func(struct timer_list *t) 580 { 581 struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer); 582 583 dev->fill_delay = 0; 584 } 585 586 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 587 { 588 struct mlx5_mr_cache *cache = &dev->cache; 589 struct mlx5_cache_ent *ent; 590 int i; 591 592 mutex_init(&dev->slow_path_mutex); 593 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); 594 if (!cache->wq) { 595 mlx5_ib_warn(dev, "failed to create work queue\n"); 596 return -ENOMEM; 597 } 598 599 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); 600 timer_setup(&dev->delay_timer, delay_time_func, 0); 601 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 602 ent = &cache->ent[i]; 603 INIT_LIST_HEAD(&ent->head); 604 spin_lock_init(&ent->lock); 605 ent->order = i + 2; 606 ent->dev = dev; 607 ent->limit = 0; 608 609 init_completion(&ent->compl); 610 INIT_WORK(&ent->work, cache_work_func); 611 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 612 613 if (i > MR_CACHE_LAST_STD_ENTRY) { 614 mlx5_odp_init_mr_cache_entry(ent); 615 continue; 616 } 617 618 if (ent->order > mr_cache_max_order(dev)) 619 continue; 620 621 ent->page = PAGE_SHIFT; 622 ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / 623 MLX5_IB_UMR_OCTOWORD; 624 ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; 625 if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && 626 !dev->is_rep && 627 mlx5_core_is_pf(dev->mdev)) 628 ent->limit = dev->mdev->profile->mr_cache[i].limit; 629 else 630 ent->limit = 0; 631 queue_work(cache->wq, &ent->work); 632 } 633 634 mlx5_mr_cache_debugfs_init(dev); 635 636 return 0; 637 } 638 639 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 640 { 641 int i; 642 643 if (!dev->cache.wq) 644 return 0; 645 646 dev->cache.stopped = 1; 647 flush_workqueue(dev->cache.wq); 648 649 mlx5_mr_cache_debugfs_cleanup(dev); 650 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); 651 652 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 653 clean_keys(dev, i); 654 655 destroy_workqueue(dev->cache.wq); 656 del_timer_sync(&dev->delay_timer); 657 658 return 0; 659 } 660 661 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, 662 struct ib_pd *pd) 663 { 664 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); 665 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); 666 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); 667 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); 668 MLX5_SET(mkc, mkc, lr, 1); 669 670 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 671 MLX5_SET(mkc, mkc, qpn, 0xffffff); 672 MLX5_SET64(mkc, mkc, start_addr, start_addr); 673 } 674 675 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 676 { 677 struct mlx5_ib_dev *dev = to_mdev(pd->device); 678 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 679 struct mlx5_core_dev *mdev = dev->mdev; 680 struct mlx5_ib_mr *mr; 681 void *mkc; 682 u32 *in; 683 int err; 684 685 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 686 if (!mr) 687 return ERR_PTR(-ENOMEM); 688 689 in = kzalloc(inlen, GFP_KERNEL); 690 if (!in) { 691 err = -ENOMEM; 692 goto err_free; 693 } 694 695 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 696 697 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 698 MLX5_SET(mkc, mkc, length64, 1); 699 set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); 700 701 err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); 702 if (err) 703 goto err_in; 704 705 kfree(in); 706 mr->mmkey.type = MLX5_MKEY_MR; 707 mr->ibmr.lkey = mr->mmkey.key; 708 mr->ibmr.rkey = mr->mmkey.key; 709 mr->umem = NULL; 710 711 return &mr->ibmr; 712 713 err_in: 714 kfree(in); 715 716 err_free: 717 kfree(mr); 718 719 return ERR_PTR(err); 720 } 721 722 static int get_octo_len(u64 addr, u64 len, int page_shift) 723 { 724 u64 page_size = 1ULL << page_shift; 725 u64 offset; 726 int npages; 727 728 offset = addr & (page_size - 1); 729 npages = ALIGN(len + offset, page_size) >> page_shift; 730 return (npages + 1) / 2; 731 } 732 733 static int mr_cache_max_order(struct mlx5_ib_dev *dev) 734 { 735 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) 736 return MR_CACHE_LAST_STD_ENTRY + 2; 737 return MLX5_MAX_UMR_SHIFT; 738 } 739 740 static int mr_umem_get(struct mlx5_ib_dev *dev, struct ib_udata *udata, 741 u64 start, u64 length, int access_flags, 742 struct ib_umem **umem, int *npages, int *page_shift, 743 int *ncont, int *order) 744 { 745 struct ib_umem *u; 746 747 *umem = NULL; 748 749 if (access_flags & IB_ACCESS_ON_DEMAND) { 750 struct ib_umem_odp *odp; 751 752 odp = ib_umem_odp_get(udata, start, length, access_flags, 753 &mlx5_mn_ops); 754 if (IS_ERR(odp)) { 755 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", 756 PTR_ERR(odp)); 757 return PTR_ERR(odp); 758 } 759 760 u = &odp->umem; 761 762 *page_shift = odp->page_shift; 763 *ncont = ib_umem_odp_num_pages(odp); 764 *npages = *ncont << (*page_shift - PAGE_SHIFT); 765 if (order) 766 *order = ilog2(roundup_pow_of_two(*ncont)); 767 } else { 768 u = ib_umem_get(udata, start, length, access_flags); 769 if (IS_ERR(u)) { 770 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); 771 return PTR_ERR(u); 772 } 773 774 mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, 775 page_shift, ncont, order); 776 } 777 778 if (!*npages) { 779 mlx5_ib_warn(dev, "avoid zero region\n"); 780 ib_umem_release(u); 781 return -EINVAL; 782 } 783 784 *umem = u; 785 786 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 787 *npages, *ncont, *order, *page_shift); 788 789 return 0; 790 } 791 792 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) 793 { 794 struct mlx5_ib_umr_context *context = 795 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); 796 797 context->status = wc->status; 798 complete(&context->done); 799 } 800 801 static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) 802 { 803 context->cqe.done = mlx5_ib_umr_done; 804 context->status = -1; 805 init_completion(&context->done); 806 } 807 808 static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, 809 struct mlx5_umr_wr *umrwr) 810 { 811 struct umr_common *umrc = &dev->umrc; 812 const struct ib_send_wr *bad; 813 int err; 814 struct mlx5_ib_umr_context umr_context; 815 816 mlx5_ib_init_umr_context(&umr_context); 817 umrwr->wr.wr_cqe = &umr_context.cqe; 818 819 down(&umrc->sem); 820 err = ib_post_send(umrc->qp, &umrwr->wr, &bad); 821 if (err) { 822 mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); 823 } else { 824 wait_for_completion(&umr_context.done); 825 if (umr_context.status != IB_WC_SUCCESS) { 826 mlx5_ib_warn(dev, "reg umr failed (%u)\n", 827 umr_context.status); 828 err = -EFAULT; 829 } 830 } 831 up(&umrc->sem); 832 return err; 833 } 834 835 static struct mlx5_ib_mr *alloc_mr_from_cache( 836 struct ib_pd *pd, struct ib_umem *umem, 837 u64 virt_addr, u64 len, int npages, 838 int page_shift, int order, int access_flags) 839 { 840 struct mlx5_ib_dev *dev = to_mdev(pd->device); 841 struct mlx5_ib_mr *mr; 842 int err = 0; 843 int i; 844 845 for (i = 0; i < 1; i++) { 846 mr = alloc_cached_mr(dev, order); 847 if (mr) 848 break; 849 850 err = add_keys(dev, order2idx(dev, order), 1); 851 if (err && err != -EAGAIN) { 852 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); 853 break; 854 } 855 } 856 857 if (!mr) 858 return ERR_PTR(-EAGAIN); 859 860 mr->ibmr.pd = pd; 861 mr->umem = umem; 862 mr->access_flags = access_flags; 863 mr->desc_size = sizeof(struct mlx5_mtt); 864 mr->mmkey.iova = virt_addr; 865 mr->mmkey.size = len; 866 mr->mmkey.pd = to_mpd(pd)->pdn; 867 868 return mr; 869 } 870 871 static inline int populate_xlt(struct mlx5_ib_mr *mr, int idx, int npages, 872 void *xlt, int page_shift, size_t size, 873 int flags) 874 { 875 struct mlx5_ib_dev *dev = mr->dev; 876 struct ib_umem *umem = mr->umem; 877 878 if (flags & MLX5_IB_UPD_XLT_INDIRECT) { 879 if (!umr_can_use_indirect_mkey(dev)) 880 return -EPERM; 881 mlx5_odp_populate_klm(xlt, idx, npages, mr, flags); 882 return npages; 883 } 884 885 npages = min_t(size_t, npages, ib_umem_num_pages(umem) - idx); 886 887 if (!(flags & MLX5_IB_UPD_XLT_ZAP)) { 888 __mlx5_ib_populate_pas(dev, umem, page_shift, 889 idx, npages, xlt, 890 MLX5_IB_MTT_PRESENT); 891 /* Clear padding after the pages 892 * brought from the umem. 893 */ 894 memset(xlt + (npages * sizeof(struct mlx5_mtt)), 0, 895 size - npages * sizeof(struct mlx5_mtt)); 896 } 897 898 return npages; 899 } 900 901 #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ 902 MLX5_UMR_MTT_ALIGNMENT) 903 #define MLX5_SPARE_UMR_CHUNK 0x10000 904 905 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 906 int page_shift, int flags) 907 { 908 struct mlx5_ib_dev *dev = mr->dev; 909 struct device *ddev = dev->ib_dev.dev.parent; 910 int size; 911 void *xlt; 912 dma_addr_t dma; 913 struct mlx5_umr_wr wr; 914 struct ib_sge sg; 915 int err = 0; 916 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) 917 ? sizeof(struct mlx5_klm) 918 : sizeof(struct mlx5_mtt); 919 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; 920 const int page_mask = page_align - 1; 921 size_t pages_mapped = 0; 922 size_t pages_to_map = 0; 923 size_t pages_iter = 0; 924 gfp_t gfp; 925 bool use_emergency_page = false; 926 927 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && 928 !umr_can_use_indirect_mkey(dev)) 929 return -EPERM; 930 931 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 932 * so we need to align the offset and length accordingly 933 */ 934 if (idx & page_mask) { 935 npages += idx & page_mask; 936 idx &= ~page_mask; 937 } 938 939 gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL; 940 gfp |= __GFP_ZERO | __GFP_NOWARN; 941 942 pages_to_map = ALIGN(npages, page_align); 943 size = desc_size * pages_to_map; 944 size = min_t(int, size, MLX5_MAX_UMR_CHUNK); 945 946 xlt = (void *)__get_free_pages(gfp, get_order(size)); 947 if (!xlt && size > MLX5_SPARE_UMR_CHUNK) { 948 mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n", 949 size, get_order(size), MLX5_SPARE_UMR_CHUNK); 950 951 size = MLX5_SPARE_UMR_CHUNK; 952 xlt = (void *)__get_free_pages(gfp, get_order(size)); 953 } 954 955 if (!xlt) { 956 mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); 957 xlt = (void *)mlx5_ib_get_xlt_emergency_page(); 958 size = PAGE_SIZE; 959 memset(xlt, 0, size); 960 use_emergency_page = true; 961 } 962 pages_iter = size / desc_size; 963 dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); 964 if (dma_mapping_error(ddev, dma)) { 965 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); 966 err = -ENOMEM; 967 goto free_xlt; 968 } 969 970 sg.addr = dma; 971 sg.lkey = dev->umrc.pd->local_dma_lkey; 972 973 memset(&wr, 0, sizeof(wr)); 974 wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; 975 if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) 976 wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; 977 wr.wr.sg_list = &sg; 978 wr.wr.num_sge = 1; 979 wr.wr.opcode = MLX5_IB_WR_UMR; 980 981 wr.pd = mr->ibmr.pd; 982 wr.mkey = mr->mmkey.key; 983 wr.length = mr->mmkey.size; 984 wr.virt_addr = mr->mmkey.iova; 985 wr.access_flags = mr->access_flags; 986 wr.page_shift = page_shift; 987 988 for (pages_mapped = 0; 989 pages_mapped < pages_to_map && !err; 990 pages_mapped += pages_iter, idx += pages_iter) { 991 npages = min_t(int, pages_iter, pages_to_map - pages_mapped); 992 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); 993 npages = populate_xlt(mr, idx, npages, xlt, 994 page_shift, size, flags); 995 996 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 997 998 sg.length = ALIGN(npages * desc_size, 999 MLX5_UMR_MTT_ALIGNMENT); 1000 1001 if (pages_mapped + pages_iter >= pages_to_map) { 1002 if (flags & MLX5_IB_UPD_XLT_ENABLE) 1003 wr.wr.send_flags |= 1004 MLX5_IB_SEND_UMR_ENABLE_MR | 1005 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | 1006 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1007 if (flags & MLX5_IB_UPD_XLT_PD || 1008 flags & MLX5_IB_UPD_XLT_ACCESS) 1009 wr.wr.send_flags |= 1010 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1011 if (flags & MLX5_IB_UPD_XLT_ADDR) 1012 wr.wr.send_flags |= 1013 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1014 } 1015 1016 wr.offset = idx * desc_size; 1017 wr.xlt_size = sg.length; 1018 1019 err = mlx5_ib_post_send_wait(dev, &wr); 1020 } 1021 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 1022 1023 free_xlt: 1024 if (use_emergency_page) 1025 mlx5_ib_put_xlt_emergency_page(); 1026 else 1027 free_pages((unsigned long)xlt, get_order(size)); 1028 1029 return err; 1030 } 1031 1032 /* 1033 * If ibmr is NULL it will be allocated by reg_create. 1034 * Else, the given ibmr will be used. 1035 */ 1036 static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, 1037 u64 virt_addr, u64 length, 1038 struct ib_umem *umem, int npages, 1039 int page_shift, int access_flags, 1040 bool populate) 1041 { 1042 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1043 struct mlx5_ib_mr *mr; 1044 __be64 *pas; 1045 void *mkc; 1046 int inlen; 1047 u32 *in; 1048 int err; 1049 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 1050 1051 mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); 1052 if (!mr) 1053 return ERR_PTR(-ENOMEM); 1054 1055 mr->ibmr.pd = pd; 1056 mr->access_flags = access_flags; 1057 1058 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1059 if (populate) 1060 inlen += sizeof(*pas) * roundup(npages, 2); 1061 in = kvzalloc(inlen, GFP_KERNEL); 1062 if (!in) { 1063 err = -ENOMEM; 1064 goto err_1; 1065 } 1066 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 1067 if (populate && !(access_flags & IB_ACCESS_ON_DEMAND)) 1068 mlx5_ib_populate_pas(dev, umem, page_shift, pas, 1069 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 1070 1071 /* The pg_access bit allows setting the access flags 1072 * in the page list submitted with the command. */ 1073 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 1074 1075 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1076 MLX5_SET(mkc, mkc, free, !populate); 1077 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); 1078 MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); 1079 MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); 1080 MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); 1081 MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); 1082 MLX5_SET(mkc, mkc, lr, 1); 1083 MLX5_SET(mkc, mkc, umr_en, 1); 1084 1085 MLX5_SET64(mkc, mkc, start_addr, virt_addr); 1086 MLX5_SET64(mkc, mkc, len, length); 1087 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1088 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 1089 MLX5_SET(mkc, mkc, translations_octword_size, 1090 get_octo_len(virt_addr, length, page_shift)); 1091 MLX5_SET(mkc, mkc, log_page_size, page_shift); 1092 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1093 if (populate) { 1094 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 1095 get_octo_len(virt_addr, length, page_shift)); 1096 } 1097 1098 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 1099 if (err) { 1100 mlx5_ib_warn(dev, "create mkey failed\n"); 1101 goto err_2; 1102 } 1103 mr->mmkey.type = MLX5_MKEY_MR; 1104 mr->desc_size = sizeof(struct mlx5_mtt); 1105 mr->dev = dev; 1106 kvfree(in); 1107 1108 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); 1109 1110 return mr; 1111 1112 err_2: 1113 kvfree(in); 1114 1115 err_1: 1116 if (!ibmr) 1117 kfree(mr); 1118 1119 return ERR_PTR(err); 1120 } 1121 1122 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 1123 int npages, u64 length, int access_flags) 1124 { 1125 mr->npages = npages; 1126 atomic_add(npages, &dev->mdev->priv.reg_pages); 1127 mr->ibmr.lkey = mr->mmkey.key; 1128 mr->ibmr.rkey = mr->mmkey.key; 1129 mr->ibmr.length = length; 1130 mr->access_flags = access_flags; 1131 } 1132 1133 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, 1134 u64 length, int acc, int mode) 1135 { 1136 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1137 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1138 struct mlx5_core_dev *mdev = dev->mdev; 1139 struct mlx5_ib_mr *mr; 1140 void *mkc; 1141 u32 *in; 1142 int err; 1143 1144 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1145 if (!mr) 1146 return ERR_PTR(-ENOMEM); 1147 1148 in = kzalloc(inlen, GFP_KERNEL); 1149 if (!in) { 1150 err = -ENOMEM; 1151 goto err_free; 1152 } 1153 1154 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1155 1156 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); 1157 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); 1158 MLX5_SET64(mkc, mkc, len, length); 1159 set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); 1160 1161 err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); 1162 if (err) 1163 goto err_in; 1164 1165 kfree(in); 1166 1167 mr->umem = NULL; 1168 set_mr_fields(dev, mr, 0, length, acc); 1169 1170 return &mr->ibmr; 1171 1172 err_in: 1173 kfree(in); 1174 1175 err_free: 1176 kfree(mr); 1177 1178 return ERR_PTR(err); 1179 } 1180 1181 int mlx5_ib_advise_mr(struct ib_pd *pd, 1182 enum ib_uverbs_advise_mr_advice advice, 1183 u32 flags, 1184 struct ib_sge *sg_list, 1185 u32 num_sge, 1186 struct uverbs_attr_bundle *attrs) 1187 { 1188 if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && 1189 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE) 1190 return -EOPNOTSUPP; 1191 1192 return mlx5_ib_advise_mr_prefetch(pd, advice, flags, 1193 sg_list, num_sge); 1194 } 1195 1196 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, 1197 struct ib_dm_mr_attr *attr, 1198 struct uverbs_attr_bundle *attrs) 1199 { 1200 struct mlx5_ib_dm *mdm = to_mdm(dm); 1201 struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev; 1202 u64 start_addr = mdm->dev_addr + attr->offset; 1203 int mode; 1204 1205 switch (mdm->type) { 1206 case MLX5_IB_UAPI_DM_TYPE_MEMIC: 1207 if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS) 1208 return ERR_PTR(-EINVAL); 1209 1210 mode = MLX5_MKC_ACCESS_MODE_MEMIC; 1211 start_addr -= pci_resource_start(dev->pdev, 0); 1212 break; 1213 case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: 1214 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: 1215 if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) 1216 return ERR_PTR(-EINVAL); 1217 1218 mode = MLX5_MKC_ACCESS_MODE_SW_ICM; 1219 break; 1220 default: 1221 return ERR_PTR(-EINVAL); 1222 } 1223 1224 return mlx5_ib_get_dm_mr(pd, start_addr, attr->length, 1225 attr->access_flags, mode); 1226 } 1227 1228 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1229 u64 virt_addr, int access_flags, 1230 struct ib_udata *udata) 1231 { 1232 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1233 struct mlx5_ib_mr *mr = NULL; 1234 bool use_umr; 1235 struct ib_umem *umem; 1236 int page_shift; 1237 int npages; 1238 int ncont; 1239 int order; 1240 int err; 1241 1242 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) 1243 return ERR_PTR(-EOPNOTSUPP); 1244 1245 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1246 start, virt_addr, length, access_flags); 1247 1248 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && 1249 length == U64_MAX) { 1250 if (!(access_flags & IB_ACCESS_ON_DEMAND) || 1251 !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) 1252 return ERR_PTR(-EINVAL); 1253 1254 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); 1255 if (IS_ERR(mr)) 1256 return ERR_CAST(mr); 1257 return &mr->ibmr; 1258 } 1259 1260 err = mr_umem_get(dev, udata, start, length, access_flags, &umem, 1261 &npages, &page_shift, &ncont, &order); 1262 1263 if (err < 0) 1264 return ERR_PTR(err); 1265 1266 use_umr = mlx5_ib_can_use_umr(dev, true); 1267 1268 if (order <= mr_cache_max_order(dev) && use_umr) { 1269 mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, 1270 page_shift, order, access_flags); 1271 if (PTR_ERR(mr) == -EAGAIN) { 1272 mlx5_ib_dbg(dev, "cache empty for order %d\n", order); 1273 mr = NULL; 1274 } 1275 } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { 1276 if (access_flags & IB_ACCESS_ON_DEMAND) { 1277 err = -EINVAL; 1278 pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); 1279 goto error; 1280 } 1281 use_umr = false; 1282 } 1283 1284 if (!mr) { 1285 mutex_lock(&dev->slow_path_mutex); 1286 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, 1287 page_shift, access_flags, !use_umr); 1288 mutex_unlock(&dev->slow_path_mutex); 1289 } 1290 1291 if (IS_ERR(mr)) { 1292 err = PTR_ERR(mr); 1293 goto error; 1294 } 1295 1296 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1297 1298 mr->umem = umem; 1299 set_mr_fields(dev, mr, npages, length, access_flags); 1300 1301 if (use_umr) { 1302 int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; 1303 1304 if (access_flags & IB_ACCESS_ON_DEMAND) 1305 update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP; 1306 1307 err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, 1308 update_xlt_flags); 1309 1310 if (err) { 1311 dereg_mr(dev, mr); 1312 return ERR_PTR(err); 1313 } 1314 } 1315 1316 if (is_odp_mr(mr)) { 1317 to_ib_umem_odp(mr->umem)->private = mr; 1318 atomic_set(&mr->num_deferred_work, 0); 1319 err = xa_err(xa_store(&dev->odp_mkeys, 1320 mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, 1321 GFP_KERNEL)); 1322 if (err) { 1323 dereg_mr(dev, mr); 1324 return ERR_PTR(err); 1325 } 1326 } 1327 1328 return &mr->ibmr; 1329 error: 1330 ib_umem_release(umem); 1331 return ERR_PTR(err); 1332 } 1333 1334 /** 1335 * mlx5_mr_cache_invalidate - Fence all DMA on the MR 1336 * @mr: The MR to fence 1337 * 1338 * Upon return the NIC will not be doing any DMA to the pages under the MR, 1339 * and any DMA inprogress will be completed. Failure of this function 1340 * indicates the HW has failed catastrophically. 1341 */ 1342 int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) 1343 { 1344 struct mlx5_umr_wr umrwr = {}; 1345 1346 if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 1347 return 0; 1348 1349 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | 1350 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1351 umrwr.wr.opcode = MLX5_IB_WR_UMR; 1352 umrwr.pd = mr->dev->umrc.pd; 1353 umrwr.mkey = mr->mmkey.key; 1354 umrwr.ignore_free_state = 1; 1355 1356 return mlx5_ib_post_send_wait(mr->dev, &umrwr); 1357 } 1358 1359 static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1360 int access_flags, int flags) 1361 { 1362 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1363 struct mlx5_umr_wr umrwr = {}; 1364 int err; 1365 1366 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1367 1368 umrwr.wr.opcode = MLX5_IB_WR_UMR; 1369 umrwr.mkey = mr->mmkey.key; 1370 1371 if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) { 1372 umrwr.pd = pd; 1373 umrwr.access_flags = access_flags; 1374 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1375 } 1376 1377 err = mlx5_ib_post_send_wait(dev, &umrwr); 1378 1379 return err; 1380 } 1381 1382 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1383 u64 length, u64 virt_addr, int new_access_flags, 1384 struct ib_pd *new_pd, struct ib_udata *udata) 1385 { 1386 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); 1387 struct mlx5_ib_mr *mr = to_mmr(ib_mr); 1388 struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; 1389 int access_flags = flags & IB_MR_REREG_ACCESS ? 1390 new_access_flags : 1391 mr->access_flags; 1392 int page_shift = 0; 1393 int upd_flags = 0; 1394 int npages = 0; 1395 int ncont = 0; 1396 int order = 0; 1397 u64 addr, len; 1398 int err; 1399 1400 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1401 start, virt_addr, length, access_flags); 1402 1403 atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); 1404 1405 if (!mr->umem) 1406 return -EINVAL; 1407 1408 if (is_odp_mr(mr)) 1409 return -EOPNOTSUPP; 1410 1411 if (flags & IB_MR_REREG_TRANS) { 1412 addr = virt_addr; 1413 len = length; 1414 } else { 1415 addr = mr->umem->address; 1416 len = mr->umem->length; 1417 } 1418 1419 if (flags != IB_MR_REREG_PD) { 1420 /* 1421 * Replace umem. This needs to be done whether or not UMR is 1422 * used. 1423 */ 1424 flags |= IB_MR_REREG_TRANS; 1425 ib_umem_release(mr->umem); 1426 mr->umem = NULL; 1427 err = mr_umem_get(dev, udata, addr, len, access_flags, 1428 &mr->umem, &npages, &page_shift, &ncont, 1429 &order); 1430 if (err) 1431 goto err; 1432 } 1433 1434 if (!mlx5_ib_can_use_umr(dev, true) || 1435 (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) { 1436 /* 1437 * UMR can't be used - MKey needs to be replaced. 1438 */ 1439 if (mr->allocated_from_cache) 1440 err = mlx5_mr_cache_invalidate(mr); 1441 else 1442 err = destroy_mkey(dev, mr); 1443 if (err) 1444 goto err; 1445 1446 mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, 1447 page_shift, access_flags, true); 1448 1449 if (IS_ERR(mr)) { 1450 err = PTR_ERR(mr); 1451 mr = to_mmr(ib_mr); 1452 goto err; 1453 } 1454 1455 mr->allocated_from_cache = 0; 1456 } else { 1457 /* 1458 * Send a UMR WQE 1459 */ 1460 mr->ibmr.pd = pd; 1461 mr->access_flags = access_flags; 1462 mr->mmkey.iova = addr; 1463 mr->mmkey.size = len; 1464 mr->mmkey.pd = to_mpd(pd)->pdn; 1465 1466 if (flags & IB_MR_REREG_TRANS) { 1467 upd_flags = MLX5_IB_UPD_XLT_ADDR; 1468 if (flags & IB_MR_REREG_PD) 1469 upd_flags |= MLX5_IB_UPD_XLT_PD; 1470 if (flags & IB_MR_REREG_ACCESS) 1471 upd_flags |= MLX5_IB_UPD_XLT_ACCESS; 1472 err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, 1473 upd_flags); 1474 } else { 1475 err = rereg_umr(pd, mr, access_flags, flags); 1476 } 1477 1478 if (err) 1479 goto err; 1480 } 1481 1482 set_mr_fields(dev, mr, npages, len, access_flags); 1483 1484 return 0; 1485 1486 err: 1487 ib_umem_release(mr->umem); 1488 mr->umem = NULL; 1489 1490 clean_mr(dev, mr); 1491 return err; 1492 } 1493 1494 static int 1495 mlx5_alloc_priv_descs(struct ib_device *device, 1496 struct mlx5_ib_mr *mr, 1497 int ndescs, 1498 int desc_size) 1499 { 1500 int size = ndescs * desc_size; 1501 int add_size; 1502 int ret; 1503 1504 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); 1505 1506 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); 1507 if (!mr->descs_alloc) 1508 return -ENOMEM; 1509 1510 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); 1511 1512 mr->desc_map = dma_map_single(device->dev.parent, mr->descs, 1513 size, DMA_TO_DEVICE); 1514 if (dma_mapping_error(device->dev.parent, mr->desc_map)) { 1515 ret = -ENOMEM; 1516 goto err; 1517 } 1518 1519 return 0; 1520 err: 1521 kfree(mr->descs_alloc); 1522 1523 return ret; 1524 } 1525 1526 static void 1527 mlx5_free_priv_descs(struct mlx5_ib_mr *mr) 1528 { 1529 if (mr->descs) { 1530 struct ib_device *device = mr->ibmr.device; 1531 int size = mr->max_descs * mr->desc_size; 1532 1533 dma_unmap_single(device->dev.parent, mr->desc_map, 1534 size, DMA_TO_DEVICE); 1535 kfree(mr->descs_alloc); 1536 mr->descs = NULL; 1537 } 1538 } 1539 1540 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1541 { 1542 int allocated_from_cache = mr->allocated_from_cache; 1543 1544 if (mr->sig) { 1545 if (mlx5_core_destroy_psv(dev->mdev, 1546 mr->sig->psv_memory.psv_idx)) 1547 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1548 mr->sig->psv_memory.psv_idx); 1549 if (mlx5_core_destroy_psv(dev->mdev, 1550 mr->sig->psv_wire.psv_idx)) 1551 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1552 mr->sig->psv_wire.psv_idx); 1553 xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key)); 1554 kfree(mr->sig); 1555 mr->sig = NULL; 1556 } 1557 1558 if (!allocated_from_cache) { 1559 destroy_mkey(dev, mr); 1560 mlx5_free_priv_descs(mr); 1561 } 1562 } 1563 1564 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1565 { 1566 int npages = mr->npages; 1567 struct ib_umem *umem = mr->umem; 1568 1569 /* Stop all DMA */ 1570 if (is_odp_mr(mr)) 1571 mlx5_ib_fence_odp_mr(mr); 1572 else 1573 clean_mr(dev, mr); 1574 1575 if (mr->allocated_from_cache) 1576 mlx5_mr_cache_free(dev, mr); 1577 else 1578 kfree(mr); 1579 1580 ib_umem_release(umem); 1581 atomic_sub(npages, &dev->mdev->priv.reg_pages); 1582 1583 } 1584 1585 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 1586 { 1587 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1588 1589 if (ibmr->type == IB_MR_TYPE_INTEGRITY) { 1590 dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr); 1591 dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr); 1592 } 1593 1594 if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) { 1595 mlx5_ib_free_implicit_mr(mmr); 1596 return 0; 1597 } 1598 1599 dereg_mr(to_mdev(ibmr->device), mmr); 1600 1601 return 0; 1602 } 1603 1604 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, 1605 int access_mode, int page_shift) 1606 { 1607 void *mkc; 1608 1609 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1610 1611 MLX5_SET(mkc, mkc, free, 1); 1612 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1613 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1614 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 1615 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); 1616 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); 1617 MLX5_SET(mkc, mkc, umr_en, 1); 1618 MLX5_SET(mkc, mkc, log_page_size, page_shift); 1619 } 1620 1621 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1622 int ndescs, int desc_size, int page_shift, 1623 int access_mode, u32 *in, int inlen) 1624 { 1625 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1626 int err; 1627 1628 mr->access_mode = access_mode; 1629 mr->desc_size = desc_size; 1630 mr->max_descs = ndescs; 1631 1632 err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); 1633 if (err) 1634 return err; 1635 1636 mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); 1637 1638 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 1639 if (err) 1640 goto err_free_descs; 1641 1642 mr->mmkey.type = MLX5_MKEY_MR; 1643 mr->ibmr.lkey = mr->mmkey.key; 1644 mr->ibmr.rkey = mr->mmkey.key; 1645 1646 return 0; 1647 1648 err_free_descs: 1649 mlx5_free_priv_descs(mr); 1650 return err; 1651 } 1652 1653 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, 1654 u32 max_num_sg, u32 max_num_meta_sg, 1655 int desc_size, int access_mode) 1656 { 1657 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1658 int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); 1659 int page_shift = 0; 1660 struct mlx5_ib_mr *mr; 1661 u32 *in; 1662 int err; 1663 1664 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1665 if (!mr) 1666 return ERR_PTR(-ENOMEM); 1667 1668 mr->ibmr.pd = pd; 1669 mr->ibmr.device = pd->device; 1670 1671 in = kzalloc(inlen, GFP_KERNEL); 1672 if (!in) { 1673 err = -ENOMEM; 1674 goto err_free; 1675 } 1676 1677 if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) 1678 page_shift = PAGE_SHIFT; 1679 1680 err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift, 1681 access_mode, in, inlen); 1682 if (err) 1683 goto err_free_in; 1684 1685 mr->umem = NULL; 1686 kfree(in); 1687 1688 return mr; 1689 1690 err_free_in: 1691 kfree(in); 1692 err_free: 1693 kfree(mr); 1694 return ERR_PTR(err); 1695 } 1696 1697 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1698 int ndescs, u32 *in, int inlen) 1699 { 1700 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt), 1701 PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in, 1702 inlen); 1703 } 1704 1705 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1706 int ndescs, u32 *in, int inlen) 1707 { 1708 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm), 1709 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 1710 } 1711 1712 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1713 int max_num_sg, int max_num_meta_sg, 1714 u32 *in, int inlen) 1715 { 1716 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1717 u32 psv_index[2]; 1718 void *mkc; 1719 int err; 1720 1721 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 1722 if (!mr->sig) 1723 return -ENOMEM; 1724 1725 /* create mem & wire PSVs */ 1726 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index); 1727 if (err) 1728 goto err_free_sig; 1729 1730 mr->sig->psv_memory.psv_idx = psv_index[0]; 1731 mr->sig->psv_wire.psv_idx = psv_index[1]; 1732 1733 mr->sig->sig_status_checked = true; 1734 mr->sig->sig_err_exists = false; 1735 /* Next UMR, Arm SIGERR */ 1736 ++mr->sig->sigerr_count; 1737 mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 1738 sizeof(struct mlx5_klm), 1739 MLX5_MKC_ACCESS_MODE_KLMS); 1740 if (IS_ERR(mr->klm_mr)) { 1741 err = PTR_ERR(mr->klm_mr); 1742 goto err_destroy_psv; 1743 } 1744 mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 1745 sizeof(struct mlx5_mtt), 1746 MLX5_MKC_ACCESS_MODE_MTT); 1747 if (IS_ERR(mr->mtt_mr)) { 1748 err = PTR_ERR(mr->mtt_mr); 1749 goto err_free_klm_mr; 1750 } 1751 1752 /* Set bsf descriptors for mkey */ 1753 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1754 MLX5_SET(mkc, mkc, bsf_en, 1); 1755 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); 1756 1757 err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0, 1758 MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 1759 if (err) 1760 goto err_free_mtt_mr; 1761 1762 err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), 1763 mr->sig, GFP_KERNEL)); 1764 if (err) 1765 goto err_free_descs; 1766 return 0; 1767 1768 err_free_descs: 1769 destroy_mkey(dev, mr); 1770 mlx5_free_priv_descs(mr); 1771 err_free_mtt_mr: 1772 dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); 1773 mr->mtt_mr = NULL; 1774 err_free_klm_mr: 1775 dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr); 1776 mr->klm_mr = NULL; 1777 err_destroy_psv: 1778 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) 1779 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1780 mr->sig->psv_memory.psv_idx); 1781 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) 1782 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1783 mr->sig->psv_wire.psv_idx); 1784 err_free_sig: 1785 kfree(mr->sig); 1786 1787 return err; 1788 } 1789 1790 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, 1791 enum ib_mr_type mr_type, u32 max_num_sg, 1792 u32 max_num_meta_sg) 1793 { 1794 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1795 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1796 int ndescs = ALIGN(max_num_sg, 4); 1797 struct mlx5_ib_mr *mr; 1798 u32 *in; 1799 int err; 1800 1801 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1802 if (!mr) 1803 return ERR_PTR(-ENOMEM); 1804 1805 in = kzalloc(inlen, GFP_KERNEL); 1806 if (!in) { 1807 err = -ENOMEM; 1808 goto err_free; 1809 } 1810 1811 mr->ibmr.device = pd->device; 1812 mr->umem = NULL; 1813 1814 switch (mr_type) { 1815 case IB_MR_TYPE_MEM_REG: 1816 err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen); 1817 break; 1818 case IB_MR_TYPE_SG_GAPS: 1819 err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen); 1820 break; 1821 case IB_MR_TYPE_INTEGRITY: 1822 err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg, 1823 max_num_meta_sg, in, inlen); 1824 break; 1825 default: 1826 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); 1827 err = -EINVAL; 1828 } 1829 1830 if (err) 1831 goto err_free_in; 1832 1833 kfree(in); 1834 1835 return &mr->ibmr; 1836 1837 err_free_in: 1838 kfree(in); 1839 err_free: 1840 kfree(mr); 1841 return ERR_PTR(err); 1842 } 1843 1844 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 1845 u32 max_num_sg, struct ib_udata *udata) 1846 { 1847 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); 1848 } 1849 1850 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, 1851 u32 max_num_sg, u32 max_num_meta_sg) 1852 { 1853 return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg, 1854 max_num_meta_sg); 1855 } 1856 1857 struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 1858 struct ib_udata *udata) 1859 { 1860 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1861 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1862 struct mlx5_ib_mw *mw = NULL; 1863 u32 *in = NULL; 1864 void *mkc; 1865 int ndescs; 1866 int err; 1867 struct mlx5_ib_alloc_mw req = {}; 1868 struct { 1869 __u32 comp_mask; 1870 __u32 response_length; 1871 } resp = {}; 1872 1873 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 1874 if (err) 1875 return ERR_PTR(err); 1876 1877 if (req.comp_mask || req.reserved1 || req.reserved2) 1878 return ERR_PTR(-EOPNOTSUPP); 1879 1880 if (udata->inlen > sizeof(req) && 1881 !ib_is_udata_cleared(udata, sizeof(req), 1882 udata->inlen - sizeof(req))) 1883 return ERR_PTR(-EOPNOTSUPP); 1884 1885 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); 1886 1887 mw = kzalloc(sizeof(*mw), GFP_KERNEL); 1888 in = kzalloc(inlen, GFP_KERNEL); 1889 if (!mw || !in) { 1890 err = -ENOMEM; 1891 goto free; 1892 } 1893 1894 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1895 1896 MLX5_SET(mkc, mkc, free, 1); 1897 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 1898 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1899 MLX5_SET(mkc, mkc, umr_en, 1); 1900 MLX5_SET(mkc, mkc, lr, 1); 1901 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); 1902 MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); 1903 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1904 1905 err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); 1906 if (err) 1907 goto free; 1908 1909 mw->mmkey.type = MLX5_MKEY_MW; 1910 mw->ibmw.rkey = mw->mmkey.key; 1911 mw->ndescs = ndescs; 1912 1913 resp.response_length = min(offsetof(typeof(resp), response_length) + 1914 sizeof(resp.response_length), udata->outlen); 1915 if (resp.response_length) { 1916 err = ib_copy_to_udata(udata, &resp, resp.response_length); 1917 if (err) { 1918 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); 1919 goto free; 1920 } 1921 } 1922 1923 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { 1924 err = xa_err(xa_store(&dev->odp_mkeys, 1925 mlx5_base_mkey(mw->mmkey.key), &mw->mmkey, 1926 GFP_KERNEL)); 1927 if (err) 1928 goto free_mkey; 1929 } 1930 1931 kfree(in); 1932 return &mw->ibmw; 1933 1934 free_mkey: 1935 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); 1936 free: 1937 kfree(mw); 1938 kfree(in); 1939 return ERR_PTR(err); 1940 } 1941 1942 int mlx5_ib_dealloc_mw(struct ib_mw *mw) 1943 { 1944 struct mlx5_ib_dev *dev = to_mdev(mw->device); 1945 struct mlx5_ib_mw *mmw = to_mmw(mw); 1946 int err; 1947 1948 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { 1949 xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); 1950 /* 1951 * pagefault_single_data_segment() may be accessing mmw under 1952 * SRCU if the user bound an ODP MR to this MW. 1953 */ 1954 synchronize_srcu(&dev->odp_srcu); 1955 } 1956 1957 err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); 1958 if (err) 1959 return err; 1960 kfree(mmw); 1961 return 0; 1962 } 1963 1964 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1965 struct ib_mr_status *mr_status) 1966 { 1967 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1968 int ret = 0; 1969 1970 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 1971 pr_err("Invalid status check mask\n"); 1972 ret = -EINVAL; 1973 goto done; 1974 } 1975 1976 mr_status->fail_status = 0; 1977 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 1978 if (!mmr->sig) { 1979 ret = -EINVAL; 1980 pr_err("signature status check requested on a non-signature enabled MR\n"); 1981 goto done; 1982 } 1983 1984 mmr->sig->sig_status_checked = true; 1985 if (!mmr->sig->sig_err_exists) 1986 goto done; 1987 1988 if (ibmr->lkey == mmr->sig->err_item.key) 1989 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 1990 sizeof(mr_status->sig_err)); 1991 else { 1992 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 1993 mr_status->sig_err.sig_err_offset = 0; 1994 mr_status->sig_err.key = mmr->sig->err_item.key; 1995 } 1996 1997 mmr->sig->sig_err_exists = false; 1998 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 1999 } 2000 2001 done: 2002 return ret; 2003 } 2004 2005 static int 2006 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2007 int data_sg_nents, unsigned int *data_sg_offset, 2008 struct scatterlist *meta_sg, int meta_sg_nents, 2009 unsigned int *meta_sg_offset) 2010 { 2011 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2012 unsigned int sg_offset = 0; 2013 int n = 0; 2014 2015 mr->meta_length = 0; 2016 if (data_sg_nents == 1) { 2017 n++; 2018 mr->ndescs = 1; 2019 if (data_sg_offset) 2020 sg_offset = *data_sg_offset; 2021 mr->data_length = sg_dma_len(data_sg) - sg_offset; 2022 mr->data_iova = sg_dma_address(data_sg) + sg_offset; 2023 if (meta_sg_nents == 1) { 2024 n++; 2025 mr->meta_ndescs = 1; 2026 if (meta_sg_offset) 2027 sg_offset = *meta_sg_offset; 2028 else 2029 sg_offset = 0; 2030 mr->meta_length = sg_dma_len(meta_sg) - sg_offset; 2031 mr->pi_iova = sg_dma_address(meta_sg) + sg_offset; 2032 } 2033 ibmr->length = mr->data_length + mr->meta_length; 2034 } 2035 2036 return n; 2037 } 2038 2039 static int 2040 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 2041 struct scatterlist *sgl, 2042 unsigned short sg_nents, 2043 unsigned int *sg_offset_p, 2044 struct scatterlist *meta_sgl, 2045 unsigned short meta_sg_nents, 2046 unsigned int *meta_sg_offset_p) 2047 { 2048 struct scatterlist *sg = sgl; 2049 struct mlx5_klm *klms = mr->descs; 2050 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 2051 u32 lkey = mr->ibmr.pd->local_dma_lkey; 2052 int i, j = 0; 2053 2054 mr->ibmr.iova = sg_dma_address(sg) + sg_offset; 2055 mr->ibmr.length = 0; 2056 2057 for_each_sg(sgl, sg, sg_nents, i) { 2058 if (unlikely(i >= mr->max_descs)) 2059 break; 2060 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); 2061 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); 2062 klms[i].key = cpu_to_be32(lkey); 2063 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2064 2065 sg_offset = 0; 2066 } 2067 2068 if (sg_offset_p) 2069 *sg_offset_p = sg_offset; 2070 2071 mr->ndescs = i; 2072 mr->data_length = mr->ibmr.length; 2073 2074 if (meta_sg_nents) { 2075 sg = meta_sgl; 2076 sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0; 2077 for_each_sg(meta_sgl, sg, meta_sg_nents, j) { 2078 if (unlikely(i + j >= mr->max_descs)) 2079 break; 2080 klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + 2081 sg_offset); 2082 klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - 2083 sg_offset); 2084 klms[i + j].key = cpu_to_be32(lkey); 2085 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2086 2087 sg_offset = 0; 2088 } 2089 if (meta_sg_offset_p) 2090 *meta_sg_offset_p = sg_offset; 2091 2092 mr->meta_ndescs = j; 2093 mr->meta_length = mr->ibmr.length - mr->data_length; 2094 } 2095 2096 return i + j; 2097 } 2098 2099 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 2100 { 2101 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2102 __be64 *descs; 2103 2104 if (unlikely(mr->ndescs == mr->max_descs)) 2105 return -ENOMEM; 2106 2107 descs = mr->descs; 2108 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2109 2110 return 0; 2111 } 2112 2113 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr) 2114 { 2115 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2116 __be64 *descs; 2117 2118 if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs)) 2119 return -ENOMEM; 2120 2121 descs = mr->descs; 2122 descs[mr->ndescs + mr->meta_ndescs++] = 2123 cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2124 2125 return 0; 2126 } 2127 2128 static int 2129 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2130 int data_sg_nents, unsigned int *data_sg_offset, 2131 struct scatterlist *meta_sg, int meta_sg_nents, 2132 unsigned int *meta_sg_offset) 2133 { 2134 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2135 struct mlx5_ib_mr *pi_mr = mr->mtt_mr; 2136 int n; 2137 2138 pi_mr->ndescs = 0; 2139 pi_mr->meta_ndescs = 0; 2140 pi_mr->meta_length = 0; 2141 2142 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2143 pi_mr->desc_size * pi_mr->max_descs, 2144 DMA_TO_DEVICE); 2145 2146 pi_mr->ibmr.page_size = ibmr->page_size; 2147 n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset, 2148 mlx5_set_page); 2149 if (n != data_sg_nents) 2150 return n; 2151 2152 pi_mr->data_iova = pi_mr->ibmr.iova; 2153 pi_mr->data_length = pi_mr->ibmr.length; 2154 pi_mr->ibmr.length = pi_mr->data_length; 2155 ibmr->length = pi_mr->data_length; 2156 2157 if (meta_sg_nents) { 2158 u64 page_mask = ~((u64)ibmr->page_size - 1); 2159 u64 iova = pi_mr->data_iova; 2160 2161 n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, 2162 meta_sg_offset, mlx5_set_page_pi); 2163 2164 pi_mr->meta_length = pi_mr->ibmr.length; 2165 /* 2166 * PI address for the HW is the offset of the metadata address 2167 * relative to the first data page address. 2168 * It equals to first data page address + size of data pages + 2169 * metadata offset at the first metadata page 2170 */ 2171 pi_mr->pi_iova = (iova & page_mask) + 2172 pi_mr->ndescs * ibmr->page_size + 2173 (pi_mr->ibmr.iova & ~page_mask); 2174 /* 2175 * In order to use one MTT MR for data and metadata, we register 2176 * also the gaps between the end of the data and the start of 2177 * the metadata (the sig MR will verify that the HW will access 2178 * to right addresses). This mapping is safe because we use 2179 * internal mkey for the registration. 2180 */ 2181 pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova; 2182 pi_mr->ibmr.iova = iova; 2183 ibmr->length += pi_mr->meta_length; 2184 } 2185 2186 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2187 pi_mr->desc_size * pi_mr->max_descs, 2188 DMA_TO_DEVICE); 2189 2190 return n; 2191 } 2192 2193 static int 2194 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2195 int data_sg_nents, unsigned int *data_sg_offset, 2196 struct scatterlist *meta_sg, int meta_sg_nents, 2197 unsigned int *meta_sg_offset) 2198 { 2199 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2200 struct mlx5_ib_mr *pi_mr = mr->klm_mr; 2201 int n; 2202 2203 pi_mr->ndescs = 0; 2204 pi_mr->meta_ndescs = 0; 2205 pi_mr->meta_length = 0; 2206 2207 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2208 pi_mr->desc_size * pi_mr->max_descs, 2209 DMA_TO_DEVICE); 2210 2211 n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset, 2212 meta_sg, meta_sg_nents, meta_sg_offset); 2213 2214 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2215 pi_mr->desc_size * pi_mr->max_descs, 2216 DMA_TO_DEVICE); 2217 2218 /* This is zero-based memory region */ 2219 pi_mr->data_iova = 0; 2220 pi_mr->ibmr.iova = 0; 2221 pi_mr->pi_iova = pi_mr->data_length; 2222 ibmr->length = pi_mr->ibmr.length; 2223 2224 return n; 2225 } 2226 2227 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2228 int data_sg_nents, unsigned int *data_sg_offset, 2229 struct scatterlist *meta_sg, int meta_sg_nents, 2230 unsigned int *meta_sg_offset) 2231 { 2232 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2233 struct mlx5_ib_mr *pi_mr = NULL; 2234 int n; 2235 2236 WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); 2237 2238 mr->ndescs = 0; 2239 mr->data_length = 0; 2240 mr->data_iova = 0; 2241 mr->meta_ndescs = 0; 2242 mr->pi_iova = 0; 2243 /* 2244 * As a performance optimization, if possible, there is no need to 2245 * perform UMR operation to register the data/metadata buffers. 2246 * First try to map the sg lists to PA descriptors with local_dma_lkey. 2247 * Fallback to UMR only in case of a failure. 2248 */ 2249 n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2250 data_sg_offset, meta_sg, meta_sg_nents, 2251 meta_sg_offset); 2252 if (n == data_sg_nents + meta_sg_nents) 2253 goto out; 2254 /* 2255 * As a performance optimization, if possible, there is no need to map 2256 * the sg lists to KLM descriptors. First try to map the sg lists to MTT 2257 * descriptors and fallback to KLM only in case of a failure. 2258 * It's more efficient for the HW to work with MTT descriptors 2259 * (especially in high load). 2260 * Use KLM (indirect access) only if it's mandatory. 2261 */ 2262 pi_mr = mr->mtt_mr; 2263 n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2264 data_sg_offset, meta_sg, meta_sg_nents, 2265 meta_sg_offset); 2266 if (n == data_sg_nents + meta_sg_nents) 2267 goto out; 2268 2269 pi_mr = mr->klm_mr; 2270 n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2271 data_sg_offset, meta_sg, meta_sg_nents, 2272 meta_sg_offset); 2273 if (unlikely(n != data_sg_nents + meta_sg_nents)) 2274 return -ENOMEM; 2275 2276 out: 2277 /* This is zero-based memory region */ 2278 ibmr->iova = 0; 2279 mr->pi_mr = pi_mr; 2280 if (pi_mr) 2281 ibmr->sig_attrs->meta_length = pi_mr->meta_length; 2282 else 2283 ibmr->sig_attrs->meta_length = mr->meta_length; 2284 2285 return 0; 2286 } 2287 2288 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 2289 unsigned int *sg_offset) 2290 { 2291 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2292 int n; 2293 2294 mr->ndescs = 0; 2295 2296 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, 2297 mr->desc_size * mr->max_descs, 2298 DMA_TO_DEVICE); 2299 2300 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) 2301 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0, 2302 NULL); 2303 else 2304 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 2305 mlx5_set_page); 2306 2307 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 2308 mr->desc_size * mr->max_descs, 2309 DMA_TO_DEVICE); 2310 2311 return n; 2312 } 2313