1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 34 #include <linux/kref.h> 35 #include <linux/random.h> 36 #include <linux/debugfs.h> 37 #include <linux/export.h> 38 #include <linux/delay.h> 39 #include <rdma/ib_umem.h> 40 #include <rdma/ib_umem_odp.h> 41 #include <rdma/ib_verbs.h> 42 #include "mlx5_ib.h" 43 44 enum { 45 MAX_PENDING_REG_MR = 8, 46 }; 47 48 #define MLX5_UMR_ALIGN 2048 49 50 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 51 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr); 52 static int mr_cache_max_order(struct mlx5_ib_dev *dev); 53 54 static bool umr_can_use_indirect_mkey(struct mlx5_ib_dev *dev) 55 { 56 return !MLX5_CAP_GEN(dev->mdev, umr_indirect_mkey_disabled); 57 } 58 59 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 60 { 61 WARN_ON(xa_load(&dev->odp_mkeys, mlx5_base_mkey(mr->mmkey.key))); 62 63 return mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 64 } 65 66 static int order2idx(struct mlx5_ib_dev *dev, int order) 67 { 68 struct mlx5_mr_cache *cache = &dev->cache; 69 70 if (order < cache->ent[0].order) 71 return 0; 72 else 73 return order - cache->ent[0].order; 74 } 75 76 static bool use_umr_mtt_update(struct mlx5_ib_mr *mr, u64 start, u64 length) 77 { 78 return ((u64)1 << mr->order) * MLX5_ADAPTER_PAGE_SIZE >= 79 length + (start & (MLX5_ADAPTER_PAGE_SIZE - 1)); 80 } 81 82 static void reg_mr_callback(int status, struct mlx5_async_work *context) 83 { 84 struct mlx5_ib_mr *mr = 85 container_of(context, struct mlx5_ib_mr, cb_work); 86 struct mlx5_ib_dev *dev = mr->dev; 87 struct mlx5_mr_cache *cache = &dev->cache; 88 int c = order2idx(dev, mr->order); 89 struct mlx5_cache_ent *ent = &cache->ent[c]; 90 u8 key; 91 unsigned long flags; 92 93 spin_lock_irqsave(&ent->lock, flags); 94 ent->pending--; 95 spin_unlock_irqrestore(&ent->lock, flags); 96 if (status) { 97 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 98 kfree(mr); 99 dev->fill_delay = 1; 100 mod_timer(&dev->delay_timer, jiffies + HZ); 101 return; 102 } 103 104 mr->mmkey.type = MLX5_MKEY_MR; 105 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 106 key = dev->mdev->priv.mkey_key++; 107 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 108 mr->mmkey.key = mlx5_idx_to_mkey(MLX5_GET(create_mkey_out, mr->out, mkey_index)) | key; 109 110 cache->last_add = jiffies; 111 112 spin_lock_irqsave(&ent->lock, flags); 113 list_add_tail(&mr->list, &ent->head); 114 ent->cur++; 115 ent->size++; 116 spin_unlock_irqrestore(&ent->lock, flags); 117 118 if (!completion_done(&ent->compl)) 119 complete(&ent->compl); 120 } 121 122 static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 123 { 124 struct mlx5_mr_cache *cache = &dev->cache; 125 struct mlx5_cache_ent *ent = &cache->ent[c]; 126 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 127 struct mlx5_ib_mr *mr; 128 void *mkc; 129 u32 *in; 130 int err = 0; 131 int i; 132 133 in = kzalloc(inlen, GFP_KERNEL); 134 if (!in) 135 return -ENOMEM; 136 137 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 138 for (i = 0; i < num; i++) { 139 if (ent->pending >= MAX_PENDING_REG_MR) { 140 err = -EAGAIN; 141 break; 142 } 143 144 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 145 if (!mr) { 146 err = -ENOMEM; 147 break; 148 } 149 mr->order = ent->order; 150 mr->allocated_from_cache = true; 151 mr->dev = dev; 152 153 MLX5_SET(mkc, mkc, free, 1); 154 MLX5_SET(mkc, mkc, umr_en, 1); 155 MLX5_SET(mkc, mkc, access_mode_1_0, ent->access_mode & 0x3); 156 MLX5_SET(mkc, mkc, access_mode_4_2, 157 (ent->access_mode >> 2) & 0x7); 158 159 MLX5_SET(mkc, mkc, qpn, 0xffffff); 160 MLX5_SET(mkc, mkc, translations_octword_size, ent->xlt); 161 MLX5_SET(mkc, mkc, log_page_size, ent->page); 162 163 spin_lock_irq(&ent->lock); 164 ent->pending++; 165 spin_unlock_irq(&ent->lock); 166 err = mlx5_core_create_mkey_cb(dev->mdev, &mr->mmkey, 167 &dev->async_ctx, in, inlen, 168 mr->out, sizeof(mr->out), 169 reg_mr_callback, &mr->cb_work); 170 if (err) { 171 spin_lock_irq(&ent->lock); 172 ent->pending--; 173 spin_unlock_irq(&ent->lock); 174 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 175 kfree(mr); 176 break; 177 } 178 } 179 180 kfree(in); 181 return err; 182 } 183 184 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 185 { 186 struct mlx5_mr_cache *cache = &dev->cache; 187 struct mlx5_cache_ent *ent = &cache->ent[c]; 188 struct mlx5_ib_mr *tmp_mr; 189 struct mlx5_ib_mr *mr; 190 LIST_HEAD(del_list); 191 int i; 192 193 for (i = 0; i < num; i++) { 194 spin_lock_irq(&ent->lock); 195 if (list_empty(&ent->head)) { 196 spin_unlock_irq(&ent->lock); 197 break; 198 } 199 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 200 list_move(&mr->list, &del_list); 201 ent->cur--; 202 ent->size--; 203 spin_unlock_irq(&ent->lock); 204 mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 205 } 206 207 list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { 208 list_del(&mr->list); 209 kfree(mr); 210 } 211 } 212 213 static ssize_t size_write(struct file *filp, const char __user *buf, 214 size_t count, loff_t *pos) 215 { 216 struct mlx5_cache_ent *ent = filp->private_data; 217 struct mlx5_ib_dev *dev = ent->dev; 218 char lbuf[20] = {0}; 219 u32 var; 220 int err; 221 int c; 222 223 count = min(count, sizeof(lbuf) - 1); 224 if (copy_from_user(lbuf, buf, count)) 225 return -EFAULT; 226 227 c = order2idx(dev, ent->order); 228 229 if (sscanf(lbuf, "%u", &var) != 1) 230 return -EINVAL; 231 232 if (var < ent->limit) 233 return -EINVAL; 234 235 if (var > ent->size) { 236 do { 237 err = add_keys(dev, c, var - ent->size); 238 if (err && err != -EAGAIN) 239 return err; 240 241 usleep_range(3000, 5000); 242 } while (err); 243 } else if (var < ent->size) { 244 remove_keys(dev, c, ent->size - var); 245 } 246 247 return count; 248 } 249 250 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 251 loff_t *pos) 252 { 253 struct mlx5_cache_ent *ent = filp->private_data; 254 char lbuf[20]; 255 int err; 256 257 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 258 if (err < 0) 259 return err; 260 261 return simple_read_from_buffer(buf, count, pos, lbuf, err); 262 } 263 264 static const struct file_operations size_fops = { 265 .owner = THIS_MODULE, 266 .open = simple_open, 267 .write = size_write, 268 .read = size_read, 269 }; 270 271 static ssize_t limit_write(struct file *filp, const char __user *buf, 272 size_t count, loff_t *pos) 273 { 274 struct mlx5_cache_ent *ent = filp->private_data; 275 struct mlx5_ib_dev *dev = ent->dev; 276 char lbuf[20] = {0}; 277 u32 var; 278 int err; 279 int c; 280 281 count = min(count, sizeof(lbuf) - 1); 282 if (copy_from_user(lbuf, buf, count)) 283 return -EFAULT; 284 285 c = order2idx(dev, ent->order); 286 287 if (sscanf(lbuf, "%u", &var) != 1) 288 return -EINVAL; 289 290 if (var > ent->size) 291 return -EINVAL; 292 293 ent->limit = var; 294 295 if (ent->cur < ent->limit) { 296 err = add_keys(dev, c, 2 * ent->limit - ent->cur); 297 if (err) 298 return err; 299 } 300 301 return count; 302 } 303 304 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 305 loff_t *pos) 306 { 307 struct mlx5_cache_ent *ent = filp->private_data; 308 char lbuf[20]; 309 int err; 310 311 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 312 if (err < 0) 313 return err; 314 315 return simple_read_from_buffer(buf, count, pos, lbuf, err); 316 } 317 318 static const struct file_operations limit_fops = { 319 .owner = THIS_MODULE, 320 .open = simple_open, 321 .write = limit_write, 322 .read = limit_read, 323 }; 324 325 static int someone_adding(struct mlx5_mr_cache *cache) 326 { 327 int i; 328 329 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 330 if (cache->ent[i].cur < cache->ent[i].limit) 331 return 1; 332 } 333 334 return 0; 335 } 336 337 static void __cache_work_func(struct mlx5_cache_ent *ent) 338 { 339 struct mlx5_ib_dev *dev = ent->dev; 340 struct mlx5_mr_cache *cache = &dev->cache; 341 int i = order2idx(dev, ent->order); 342 int err; 343 344 if (cache->stopped) 345 return; 346 347 ent = &dev->cache.ent[i]; 348 if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 349 err = add_keys(dev, i, 1); 350 if (ent->cur < 2 * ent->limit) { 351 if (err == -EAGAIN) { 352 mlx5_ib_dbg(dev, "returned eagain, order %d\n", 353 i + 2); 354 queue_delayed_work(cache->wq, &ent->dwork, 355 msecs_to_jiffies(3)); 356 } else if (err) { 357 mlx5_ib_warn(dev, "command failed order %d, err %d\n", 358 i + 2, err); 359 queue_delayed_work(cache->wq, &ent->dwork, 360 msecs_to_jiffies(1000)); 361 } else { 362 queue_work(cache->wq, &ent->work); 363 } 364 } 365 } else if (ent->cur > 2 * ent->limit) { 366 /* 367 * The remove_keys() logic is performed as garbage collection 368 * task. Such task is intended to be run when no other active 369 * processes are running. 370 * 371 * The need_resched() will return TRUE if there are user tasks 372 * to be activated in near future. 373 * 374 * In such case, we don't execute remove_keys() and postpone 375 * the garbage collection work to try to run in next cycle, 376 * in order to free CPU resources to other tasks. 377 */ 378 if (!need_resched() && !someone_adding(cache) && 379 time_after(jiffies, cache->last_add + 300 * HZ)) { 380 remove_keys(dev, i, 1); 381 if (ent->cur > ent->limit) 382 queue_work(cache->wq, &ent->work); 383 } else { 384 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 385 } 386 } 387 } 388 389 static void delayed_cache_work_func(struct work_struct *work) 390 { 391 struct mlx5_cache_ent *ent; 392 393 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 394 __cache_work_func(ent); 395 } 396 397 static void cache_work_func(struct work_struct *work) 398 { 399 struct mlx5_cache_ent *ent; 400 401 ent = container_of(work, struct mlx5_cache_ent, work); 402 __cache_work_func(ent); 403 } 404 405 struct mlx5_ib_mr *mlx5_mr_cache_alloc(struct mlx5_ib_dev *dev, int entry) 406 { 407 struct mlx5_mr_cache *cache = &dev->cache; 408 struct mlx5_cache_ent *ent; 409 struct mlx5_ib_mr *mr; 410 int err; 411 412 if (entry < 0 || entry >= MAX_MR_CACHE_ENTRIES) { 413 mlx5_ib_err(dev, "cache entry %d is out of range\n", entry); 414 return ERR_PTR(-EINVAL); 415 } 416 417 ent = &cache->ent[entry]; 418 while (1) { 419 spin_lock_irq(&ent->lock); 420 if (list_empty(&ent->head)) { 421 spin_unlock_irq(&ent->lock); 422 423 err = add_keys(dev, entry, 1); 424 if (err && err != -EAGAIN) 425 return ERR_PTR(err); 426 427 wait_for_completion(&ent->compl); 428 } else { 429 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 430 list); 431 list_del(&mr->list); 432 ent->cur--; 433 spin_unlock_irq(&ent->lock); 434 if (ent->cur < ent->limit) 435 queue_work(cache->wq, &ent->work); 436 return mr; 437 } 438 } 439 } 440 441 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 442 { 443 struct mlx5_mr_cache *cache = &dev->cache; 444 struct mlx5_ib_mr *mr = NULL; 445 struct mlx5_cache_ent *ent; 446 int last_umr_cache_entry; 447 int c; 448 int i; 449 450 c = order2idx(dev, order); 451 last_umr_cache_entry = order2idx(dev, mr_cache_max_order(dev)); 452 if (c < 0 || c > last_umr_cache_entry) { 453 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 454 return NULL; 455 } 456 457 for (i = c; i <= last_umr_cache_entry; i++) { 458 ent = &cache->ent[i]; 459 460 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 461 462 spin_lock_irq(&ent->lock); 463 if (!list_empty(&ent->head)) { 464 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 465 list); 466 list_del(&mr->list); 467 ent->cur--; 468 spin_unlock_irq(&ent->lock); 469 if (ent->cur < ent->limit) 470 queue_work(cache->wq, &ent->work); 471 break; 472 } 473 spin_unlock_irq(&ent->lock); 474 475 queue_work(cache->wq, &ent->work); 476 } 477 478 if (!mr) 479 cache->ent[c].miss++; 480 481 return mr; 482 } 483 484 void mlx5_mr_cache_free(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 485 { 486 struct mlx5_mr_cache *cache = &dev->cache; 487 struct mlx5_cache_ent *ent; 488 int shrink = 0; 489 int c; 490 491 if (!mr->allocated_from_cache) 492 return; 493 494 c = order2idx(dev, mr->order); 495 WARN_ON(c < 0 || c >= MAX_MR_CACHE_ENTRIES); 496 497 if (mlx5_mr_cache_invalidate(mr)) { 498 mr->allocated_from_cache = false; 499 destroy_mkey(dev, mr); 500 ent = &cache->ent[c]; 501 if (ent->cur < ent->limit) 502 queue_work(cache->wq, &ent->work); 503 return; 504 } 505 506 ent = &cache->ent[c]; 507 spin_lock_irq(&ent->lock); 508 list_add_tail(&mr->list, &ent->head); 509 ent->cur++; 510 if (ent->cur > 2 * ent->limit) 511 shrink = 1; 512 spin_unlock_irq(&ent->lock); 513 514 if (shrink) 515 queue_work(cache->wq, &ent->work); 516 } 517 518 static void clean_keys(struct mlx5_ib_dev *dev, int c) 519 { 520 struct mlx5_mr_cache *cache = &dev->cache; 521 struct mlx5_cache_ent *ent = &cache->ent[c]; 522 struct mlx5_ib_mr *tmp_mr; 523 struct mlx5_ib_mr *mr; 524 LIST_HEAD(del_list); 525 526 cancel_delayed_work(&ent->dwork); 527 while (1) { 528 spin_lock_irq(&ent->lock); 529 if (list_empty(&ent->head)) { 530 spin_unlock_irq(&ent->lock); 531 break; 532 } 533 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 534 list_move(&mr->list, &del_list); 535 ent->cur--; 536 ent->size--; 537 spin_unlock_irq(&ent->lock); 538 mlx5_core_destroy_mkey(dev->mdev, &mr->mmkey); 539 } 540 541 list_for_each_entry_safe(mr, tmp_mr, &del_list, list) { 542 list_del(&mr->list); 543 kfree(mr); 544 } 545 } 546 547 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 548 { 549 if (!mlx5_debugfs_root || dev->is_rep) 550 return; 551 552 debugfs_remove_recursive(dev->cache.root); 553 dev->cache.root = NULL; 554 } 555 556 static void mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) 557 { 558 struct mlx5_mr_cache *cache = &dev->cache; 559 struct mlx5_cache_ent *ent; 560 struct dentry *dir; 561 int i; 562 563 if (!mlx5_debugfs_root || dev->is_rep) 564 return; 565 566 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); 567 568 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 569 ent = &cache->ent[i]; 570 sprintf(ent->name, "%d", ent->order); 571 dir = debugfs_create_dir(ent->name, cache->root); 572 debugfs_create_file("size", 0600, dir, ent, &size_fops); 573 debugfs_create_file("limit", 0600, dir, ent, &limit_fops); 574 debugfs_create_u32("cur", 0400, dir, &ent->cur); 575 debugfs_create_u32("miss", 0600, dir, &ent->miss); 576 } 577 } 578 579 static void delay_time_func(struct timer_list *t) 580 { 581 struct mlx5_ib_dev *dev = from_timer(dev, t, delay_timer); 582 583 dev->fill_delay = 0; 584 } 585 586 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 587 { 588 struct mlx5_mr_cache *cache = &dev->cache; 589 struct mlx5_cache_ent *ent; 590 int i; 591 592 mutex_init(&dev->slow_path_mutex); 593 cache->wq = alloc_ordered_workqueue("mkey_cache", WQ_MEM_RECLAIM); 594 if (!cache->wq) { 595 mlx5_ib_warn(dev, "failed to create work queue\n"); 596 return -ENOMEM; 597 } 598 599 mlx5_cmd_init_async_ctx(dev->mdev, &dev->async_ctx); 600 timer_setup(&dev->delay_timer, delay_time_func, 0); 601 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 602 ent = &cache->ent[i]; 603 INIT_LIST_HEAD(&ent->head); 604 spin_lock_init(&ent->lock); 605 ent->order = i + 2; 606 ent->dev = dev; 607 ent->limit = 0; 608 609 init_completion(&ent->compl); 610 INIT_WORK(&ent->work, cache_work_func); 611 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 612 613 if (i > MR_CACHE_LAST_STD_ENTRY) { 614 mlx5_odp_init_mr_cache_entry(ent); 615 continue; 616 } 617 618 if (ent->order > mr_cache_max_order(dev)) 619 continue; 620 621 ent->page = PAGE_SHIFT; 622 ent->xlt = (1 << ent->order) * sizeof(struct mlx5_mtt) / 623 MLX5_IB_UMR_OCTOWORD; 624 ent->access_mode = MLX5_MKC_ACCESS_MODE_MTT; 625 if ((dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) && 626 !dev->is_rep && 627 mlx5_core_is_pf(dev->mdev)) 628 ent->limit = dev->mdev->profile->mr_cache[i].limit; 629 else 630 ent->limit = 0; 631 queue_work(cache->wq, &ent->work); 632 } 633 634 mlx5_mr_cache_debugfs_init(dev); 635 636 return 0; 637 } 638 639 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 640 { 641 int i; 642 643 if (!dev->cache.wq) 644 return 0; 645 646 dev->cache.stopped = 1; 647 flush_workqueue(dev->cache.wq); 648 649 mlx5_mr_cache_debugfs_cleanup(dev); 650 mlx5_cmd_cleanup_async_ctx(&dev->async_ctx); 651 652 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 653 clean_keys(dev, i); 654 655 destroy_workqueue(dev->cache.wq); 656 del_timer_sync(&dev->delay_timer); 657 658 return 0; 659 } 660 661 static void set_mkc_access_pd_addr_fields(void *mkc, int acc, u64 start_addr, 662 struct ib_pd *pd) 663 { 664 struct mlx5_ib_dev *dev = to_mdev(pd->device); 665 666 MLX5_SET(mkc, mkc, a, !!(acc & IB_ACCESS_REMOTE_ATOMIC)); 667 MLX5_SET(mkc, mkc, rw, !!(acc & IB_ACCESS_REMOTE_WRITE)); 668 MLX5_SET(mkc, mkc, rr, !!(acc & IB_ACCESS_REMOTE_READ)); 669 MLX5_SET(mkc, mkc, lw, !!(acc & IB_ACCESS_LOCAL_WRITE)); 670 MLX5_SET(mkc, mkc, lr, 1); 671 672 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) 673 MLX5_SET(mkc, mkc, relaxed_ordering_write, 674 !!(acc & IB_ACCESS_RELAXED_ORDERING)); 675 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) 676 MLX5_SET(mkc, mkc, relaxed_ordering_read, 677 !!(acc & IB_ACCESS_RELAXED_ORDERING)); 678 679 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 680 MLX5_SET(mkc, mkc, qpn, 0xffffff); 681 MLX5_SET64(mkc, mkc, start_addr, start_addr); 682 } 683 684 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 685 { 686 struct mlx5_ib_dev *dev = to_mdev(pd->device); 687 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 688 struct mlx5_core_dev *mdev = dev->mdev; 689 struct mlx5_ib_mr *mr; 690 void *mkc; 691 u32 *in; 692 int err; 693 694 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 695 if (!mr) 696 return ERR_PTR(-ENOMEM); 697 698 in = kzalloc(inlen, GFP_KERNEL); 699 if (!in) { 700 err = -ENOMEM; 701 goto err_free; 702 } 703 704 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 705 706 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_PA); 707 MLX5_SET(mkc, mkc, length64, 1); 708 set_mkc_access_pd_addr_fields(mkc, acc, 0, pd); 709 710 err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); 711 if (err) 712 goto err_in; 713 714 kfree(in); 715 mr->mmkey.type = MLX5_MKEY_MR; 716 mr->ibmr.lkey = mr->mmkey.key; 717 mr->ibmr.rkey = mr->mmkey.key; 718 mr->umem = NULL; 719 720 return &mr->ibmr; 721 722 err_in: 723 kfree(in); 724 725 err_free: 726 kfree(mr); 727 728 return ERR_PTR(err); 729 } 730 731 static int get_octo_len(u64 addr, u64 len, int page_shift) 732 { 733 u64 page_size = 1ULL << page_shift; 734 u64 offset; 735 int npages; 736 737 offset = addr & (page_size - 1); 738 npages = ALIGN(len + offset, page_size) >> page_shift; 739 return (npages + 1) / 2; 740 } 741 742 static int mr_cache_max_order(struct mlx5_ib_dev *dev) 743 { 744 if (MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) 745 return MR_CACHE_LAST_STD_ENTRY + 2; 746 return MLX5_MAX_UMR_SHIFT; 747 } 748 749 static int mr_umem_get(struct mlx5_ib_dev *dev, u64 start, u64 length, 750 int access_flags, struct ib_umem **umem, int *npages, 751 int *page_shift, int *ncont, int *order) 752 { 753 struct ib_umem *u; 754 755 *umem = NULL; 756 757 if (access_flags & IB_ACCESS_ON_DEMAND) { 758 struct ib_umem_odp *odp; 759 760 odp = ib_umem_odp_get(&dev->ib_dev, start, length, access_flags, 761 &mlx5_mn_ops); 762 if (IS_ERR(odp)) { 763 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", 764 PTR_ERR(odp)); 765 return PTR_ERR(odp); 766 } 767 768 u = &odp->umem; 769 770 *page_shift = odp->page_shift; 771 *ncont = ib_umem_odp_num_pages(odp); 772 *npages = *ncont << (*page_shift - PAGE_SHIFT); 773 if (order) 774 *order = ilog2(roundup_pow_of_two(*ncont)); 775 } else { 776 u = ib_umem_get(&dev->ib_dev, start, length, access_flags); 777 if (IS_ERR(u)) { 778 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(u)); 779 return PTR_ERR(u); 780 } 781 782 mlx5_ib_cont_pages(u, start, MLX5_MKEY_PAGE_SHIFT_MASK, npages, 783 page_shift, ncont, order); 784 } 785 786 if (!*npages) { 787 mlx5_ib_warn(dev, "avoid zero region\n"); 788 ib_umem_release(u); 789 return -EINVAL; 790 } 791 792 *umem = u; 793 794 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 795 *npages, *ncont, *order, *page_shift); 796 797 return 0; 798 } 799 800 static void mlx5_ib_umr_done(struct ib_cq *cq, struct ib_wc *wc) 801 { 802 struct mlx5_ib_umr_context *context = 803 container_of(wc->wr_cqe, struct mlx5_ib_umr_context, cqe); 804 805 context->status = wc->status; 806 complete(&context->done); 807 } 808 809 static inline void mlx5_ib_init_umr_context(struct mlx5_ib_umr_context *context) 810 { 811 context->cqe.done = mlx5_ib_umr_done; 812 context->status = -1; 813 init_completion(&context->done); 814 } 815 816 static int mlx5_ib_post_send_wait(struct mlx5_ib_dev *dev, 817 struct mlx5_umr_wr *umrwr) 818 { 819 struct umr_common *umrc = &dev->umrc; 820 const struct ib_send_wr *bad; 821 int err; 822 struct mlx5_ib_umr_context umr_context; 823 824 mlx5_ib_init_umr_context(&umr_context); 825 umrwr->wr.wr_cqe = &umr_context.cqe; 826 827 down(&umrc->sem); 828 err = ib_post_send(umrc->qp, &umrwr->wr, &bad); 829 if (err) { 830 mlx5_ib_warn(dev, "UMR post send failed, err %d\n", err); 831 } else { 832 wait_for_completion(&umr_context.done); 833 if (umr_context.status != IB_WC_SUCCESS) { 834 mlx5_ib_warn(dev, "reg umr failed (%u)\n", 835 umr_context.status); 836 err = -EFAULT; 837 } 838 } 839 up(&umrc->sem); 840 return err; 841 } 842 843 static struct mlx5_ib_mr *alloc_mr_from_cache( 844 struct ib_pd *pd, struct ib_umem *umem, 845 u64 virt_addr, u64 len, int npages, 846 int page_shift, int order, int access_flags) 847 { 848 struct mlx5_ib_dev *dev = to_mdev(pd->device); 849 struct mlx5_ib_mr *mr; 850 int err = 0; 851 int i; 852 853 for (i = 0; i < 1; i++) { 854 mr = alloc_cached_mr(dev, order); 855 if (mr) 856 break; 857 858 err = add_keys(dev, order2idx(dev, order), 1); 859 if (err && err != -EAGAIN) { 860 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); 861 break; 862 } 863 } 864 865 if (!mr) 866 return ERR_PTR(-EAGAIN); 867 868 mr->ibmr.pd = pd; 869 mr->umem = umem; 870 mr->access_flags = access_flags; 871 mr->desc_size = sizeof(struct mlx5_mtt); 872 mr->mmkey.iova = virt_addr; 873 mr->mmkey.size = len; 874 mr->mmkey.pd = to_mpd(pd)->pdn; 875 876 return mr; 877 } 878 879 #define MLX5_MAX_UMR_CHUNK ((1 << (MLX5_MAX_UMR_SHIFT + 4)) - \ 880 MLX5_UMR_MTT_ALIGNMENT) 881 #define MLX5_SPARE_UMR_CHUNK 0x10000 882 883 int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, 884 int page_shift, int flags) 885 { 886 struct mlx5_ib_dev *dev = mr->dev; 887 struct device *ddev = dev->ib_dev.dev.parent; 888 int size; 889 void *xlt; 890 dma_addr_t dma; 891 struct mlx5_umr_wr wr; 892 struct ib_sge sg; 893 int err = 0; 894 int desc_size = (flags & MLX5_IB_UPD_XLT_INDIRECT) 895 ? sizeof(struct mlx5_klm) 896 : sizeof(struct mlx5_mtt); 897 const int page_align = MLX5_UMR_MTT_ALIGNMENT / desc_size; 898 const int page_mask = page_align - 1; 899 size_t pages_mapped = 0; 900 size_t pages_to_map = 0; 901 size_t pages_iter = 0; 902 size_t size_to_map = 0; 903 gfp_t gfp; 904 bool use_emergency_page = false; 905 906 if ((flags & MLX5_IB_UPD_XLT_INDIRECT) && 907 !umr_can_use_indirect_mkey(dev)) 908 return -EPERM; 909 910 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 911 * so we need to align the offset and length accordingly 912 */ 913 if (idx & page_mask) { 914 npages += idx & page_mask; 915 idx &= ~page_mask; 916 } 917 918 gfp = flags & MLX5_IB_UPD_XLT_ATOMIC ? GFP_ATOMIC : GFP_KERNEL; 919 gfp |= __GFP_ZERO | __GFP_NOWARN; 920 921 pages_to_map = ALIGN(npages, page_align); 922 size = desc_size * pages_to_map; 923 size = min_t(int, size, MLX5_MAX_UMR_CHUNK); 924 925 xlt = (void *)__get_free_pages(gfp, get_order(size)); 926 if (!xlt && size > MLX5_SPARE_UMR_CHUNK) { 927 mlx5_ib_dbg(dev, "Failed to allocate %d bytes of order %d. fallback to spare UMR allocation od %d bytes\n", 928 size, get_order(size), MLX5_SPARE_UMR_CHUNK); 929 930 size = MLX5_SPARE_UMR_CHUNK; 931 xlt = (void *)__get_free_pages(gfp, get_order(size)); 932 } 933 934 if (!xlt) { 935 mlx5_ib_warn(dev, "Using XLT emergency buffer\n"); 936 xlt = (void *)mlx5_ib_get_xlt_emergency_page(); 937 size = PAGE_SIZE; 938 memset(xlt, 0, size); 939 use_emergency_page = true; 940 } 941 pages_iter = size / desc_size; 942 dma = dma_map_single(ddev, xlt, size, DMA_TO_DEVICE); 943 if (dma_mapping_error(ddev, dma)) { 944 mlx5_ib_err(dev, "unable to map DMA during XLT update.\n"); 945 err = -ENOMEM; 946 goto free_xlt; 947 } 948 949 if (mr->umem->is_odp) { 950 if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { 951 struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); 952 size_t max_pages = ib_umem_odp_num_pages(odp) - idx; 953 954 pages_to_map = min_t(size_t, pages_to_map, max_pages); 955 } 956 } 957 958 sg.addr = dma; 959 sg.lkey = dev->umrc.pd->local_dma_lkey; 960 961 memset(&wr, 0, sizeof(wr)); 962 wr.wr.send_flags = MLX5_IB_SEND_UMR_UPDATE_XLT; 963 if (!(flags & MLX5_IB_UPD_XLT_ENABLE)) 964 wr.wr.send_flags |= MLX5_IB_SEND_UMR_FAIL_IF_FREE; 965 wr.wr.sg_list = &sg; 966 wr.wr.num_sge = 1; 967 wr.wr.opcode = MLX5_IB_WR_UMR; 968 969 wr.pd = mr->ibmr.pd; 970 wr.mkey = mr->mmkey.key; 971 wr.length = mr->mmkey.size; 972 wr.virt_addr = mr->mmkey.iova; 973 wr.access_flags = mr->access_flags; 974 wr.page_shift = page_shift; 975 976 for (pages_mapped = 0; 977 pages_mapped < pages_to_map && !err; 978 pages_mapped += pages_iter, idx += pages_iter) { 979 npages = min_t(int, pages_iter, pages_to_map - pages_mapped); 980 size_to_map = npages * desc_size; 981 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); 982 if (mr->umem->is_odp) { 983 mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); 984 } else { 985 __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, 986 npages, xlt, 987 MLX5_IB_MTT_PRESENT); 988 /* Clear padding after the pages 989 * brought from the umem. 990 */ 991 memset(xlt + size_to_map, 0, size - size_to_map); 992 } 993 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 994 995 sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); 996 997 if (pages_mapped + pages_iter >= pages_to_map) { 998 if (flags & MLX5_IB_UPD_XLT_ENABLE) 999 wr.wr.send_flags |= 1000 MLX5_IB_SEND_UMR_ENABLE_MR | 1001 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | 1002 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1003 if (flags & MLX5_IB_UPD_XLT_PD || 1004 flags & MLX5_IB_UPD_XLT_ACCESS) 1005 wr.wr.send_flags |= 1006 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1007 if (flags & MLX5_IB_UPD_XLT_ADDR) 1008 wr.wr.send_flags |= 1009 MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; 1010 } 1011 1012 wr.offset = idx * desc_size; 1013 wr.xlt_size = sg.length; 1014 1015 err = mlx5_ib_post_send_wait(dev, &wr); 1016 } 1017 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 1018 1019 free_xlt: 1020 if (use_emergency_page) 1021 mlx5_ib_put_xlt_emergency_page(); 1022 else 1023 free_pages((unsigned long)xlt, get_order(size)); 1024 1025 return err; 1026 } 1027 1028 /* 1029 * If ibmr is NULL it will be allocated by reg_create. 1030 * Else, the given ibmr will be used. 1031 */ 1032 static struct mlx5_ib_mr *reg_create(struct ib_mr *ibmr, struct ib_pd *pd, 1033 u64 virt_addr, u64 length, 1034 struct ib_umem *umem, int npages, 1035 int page_shift, int access_flags, 1036 bool populate) 1037 { 1038 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1039 struct mlx5_ib_mr *mr; 1040 __be64 *pas; 1041 void *mkc; 1042 int inlen; 1043 u32 *in; 1044 int err; 1045 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 1046 1047 mr = ibmr ? to_mmr(ibmr) : kzalloc(sizeof(*mr), GFP_KERNEL); 1048 if (!mr) 1049 return ERR_PTR(-ENOMEM); 1050 1051 mr->ibmr.pd = pd; 1052 mr->access_flags = access_flags; 1053 1054 inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1055 if (populate) 1056 inlen += sizeof(*pas) * roundup(npages, 2); 1057 in = kvzalloc(inlen, GFP_KERNEL); 1058 if (!in) { 1059 err = -ENOMEM; 1060 goto err_1; 1061 } 1062 pas = (__be64 *)MLX5_ADDR_OF(create_mkey_in, in, klm_pas_mtt); 1063 if (populate && !(access_flags & IB_ACCESS_ON_DEMAND)) 1064 mlx5_ib_populate_pas(dev, umem, page_shift, pas, 1065 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 1066 1067 /* The pg_access bit allows setting the access flags 1068 * in the page list submitted with the command. */ 1069 MLX5_SET(create_mkey_in, in, pg_access, !!(pg_cap)); 1070 1071 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1072 MLX5_SET(mkc, mkc, free, !populate); 1073 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_MTT); 1074 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_write)) 1075 MLX5_SET(mkc, mkc, relaxed_ordering_write, 1076 !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); 1077 if (MLX5_CAP_GEN(dev->mdev, relaxed_ordering_read)) 1078 MLX5_SET(mkc, mkc, relaxed_ordering_read, 1079 !!(access_flags & IB_ACCESS_RELAXED_ORDERING)); 1080 MLX5_SET(mkc, mkc, a, !!(access_flags & IB_ACCESS_REMOTE_ATOMIC)); 1081 MLX5_SET(mkc, mkc, rw, !!(access_flags & IB_ACCESS_REMOTE_WRITE)); 1082 MLX5_SET(mkc, mkc, rr, !!(access_flags & IB_ACCESS_REMOTE_READ)); 1083 MLX5_SET(mkc, mkc, lw, !!(access_flags & IB_ACCESS_LOCAL_WRITE)); 1084 MLX5_SET(mkc, mkc, lr, 1); 1085 MLX5_SET(mkc, mkc, umr_en, 1); 1086 1087 MLX5_SET64(mkc, mkc, start_addr, virt_addr); 1088 MLX5_SET64(mkc, mkc, len, length); 1089 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1090 MLX5_SET(mkc, mkc, bsf_octword_size, 0); 1091 MLX5_SET(mkc, mkc, translations_octword_size, 1092 get_octo_len(virt_addr, length, page_shift)); 1093 MLX5_SET(mkc, mkc, log_page_size, page_shift); 1094 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1095 if (populate) { 1096 MLX5_SET(create_mkey_in, in, translations_octword_actual_size, 1097 get_octo_len(virt_addr, length, page_shift)); 1098 } 1099 1100 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 1101 if (err) { 1102 mlx5_ib_warn(dev, "create mkey failed\n"); 1103 goto err_2; 1104 } 1105 mr->mmkey.type = MLX5_MKEY_MR; 1106 mr->desc_size = sizeof(struct mlx5_mtt); 1107 mr->dev = dev; 1108 kvfree(in); 1109 1110 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmkey.key); 1111 1112 return mr; 1113 1114 err_2: 1115 kvfree(in); 1116 1117 err_1: 1118 if (!ibmr) 1119 kfree(mr); 1120 1121 return ERR_PTR(err); 1122 } 1123 1124 static void set_mr_fields(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr, 1125 int npages, u64 length, int access_flags) 1126 { 1127 mr->npages = npages; 1128 atomic_add(npages, &dev->mdev->priv.reg_pages); 1129 mr->ibmr.lkey = mr->mmkey.key; 1130 mr->ibmr.rkey = mr->mmkey.key; 1131 mr->ibmr.length = length; 1132 mr->access_flags = access_flags; 1133 } 1134 1135 static struct ib_mr *mlx5_ib_get_dm_mr(struct ib_pd *pd, u64 start_addr, 1136 u64 length, int acc, int mode) 1137 { 1138 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1139 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1140 struct mlx5_core_dev *mdev = dev->mdev; 1141 struct mlx5_ib_mr *mr; 1142 void *mkc; 1143 u32 *in; 1144 int err; 1145 1146 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1147 if (!mr) 1148 return ERR_PTR(-ENOMEM); 1149 1150 in = kzalloc(inlen, GFP_KERNEL); 1151 if (!in) { 1152 err = -ENOMEM; 1153 goto err_free; 1154 } 1155 1156 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1157 1158 MLX5_SET(mkc, mkc, access_mode_1_0, mode & 0x3); 1159 MLX5_SET(mkc, mkc, access_mode_4_2, (mode >> 2) & 0x7); 1160 MLX5_SET64(mkc, mkc, len, length); 1161 set_mkc_access_pd_addr_fields(mkc, acc, start_addr, pd); 1162 1163 err = mlx5_core_create_mkey(mdev, &mr->mmkey, in, inlen); 1164 if (err) 1165 goto err_in; 1166 1167 kfree(in); 1168 1169 mr->umem = NULL; 1170 set_mr_fields(dev, mr, 0, length, acc); 1171 1172 return &mr->ibmr; 1173 1174 err_in: 1175 kfree(in); 1176 1177 err_free: 1178 kfree(mr); 1179 1180 return ERR_PTR(err); 1181 } 1182 1183 int mlx5_ib_advise_mr(struct ib_pd *pd, 1184 enum ib_uverbs_advise_mr_advice advice, 1185 u32 flags, 1186 struct ib_sge *sg_list, 1187 u32 num_sge, 1188 struct uverbs_attr_bundle *attrs) 1189 { 1190 if (advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH && 1191 advice != IB_UVERBS_ADVISE_MR_ADVICE_PREFETCH_WRITE) 1192 return -EOPNOTSUPP; 1193 1194 return mlx5_ib_advise_mr_prefetch(pd, advice, flags, 1195 sg_list, num_sge); 1196 } 1197 1198 struct ib_mr *mlx5_ib_reg_dm_mr(struct ib_pd *pd, struct ib_dm *dm, 1199 struct ib_dm_mr_attr *attr, 1200 struct uverbs_attr_bundle *attrs) 1201 { 1202 struct mlx5_ib_dm *mdm = to_mdm(dm); 1203 struct mlx5_core_dev *dev = to_mdev(dm->device)->mdev; 1204 u64 start_addr = mdm->dev_addr + attr->offset; 1205 int mode; 1206 1207 switch (mdm->type) { 1208 case MLX5_IB_UAPI_DM_TYPE_MEMIC: 1209 if (attr->access_flags & ~MLX5_IB_DM_MEMIC_ALLOWED_ACCESS) 1210 return ERR_PTR(-EINVAL); 1211 1212 mode = MLX5_MKC_ACCESS_MODE_MEMIC; 1213 start_addr -= pci_resource_start(dev->pdev, 0); 1214 break; 1215 case MLX5_IB_UAPI_DM_TYPE_STEERING_SW_ICM: 1216 case MLX5_IB_UAPI_DM_TYPE_HEADER_MODIFY_SW_ICM: 1217 if (attr->access_flags & ~MLX5_IB_DM_SW_ICM_ALLOWED_ACCESS) 1218 return ERR_PTR(-EINVAL); 1219 1220 mode = MLX5_MKC_ACCESS_MODE_SW_ICM; 1221 break; 1222 default: 1223 return ERR_PTR(-EINVAL); 1224 } 1225 1226 return mlx5_ib_get_dm_mr(pd, start_addr, attr->length, 1227 attr->access_flags, mode); 1228 } 1229 1230 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1231 u64 virt_addr, int access_flags, 1232 struct ib_udata *udata) 1233 { 1234 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1235 struct mlx5_ib_mr *mr = NULL; 1236 bool use_umr; 1237 struct ib_umem *umem; 1238 int page_shift; 1239 int npages; 1240 int ncont; 1241 int order; 1242 int err; 1243 1244 if (!IS_ENABLED(CONFIG_INFINIBAND_USER_MEM)) 1245 return ERR_PTR(-EOPNOTSUPP); 1246 1247 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1248 start, virt_addr, length, access_flags); 1249 1250 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING) && !start && 1251 length == U64_MAX) { 1252 if (virt_addr != start) 1253 return ERR_PTR(-EINVAL); 1254 if (!(access_flags & IB_ACCESS_ON_DEMAND) || 1255 !(dev->odp_caps.general_caps & IB_ODP_SUPPORT_IMPLICIT)) 1256 return ERR_PTR(-EINVAL); 1257 1258 mr = mlx5_ib_alloc_implicit_mr(to_mpd(pd), udata, access_flags); 1259 if (IS_ERR(mr)) 1260 return ERR_CAST(mr); 1261 return &mr->ibmr; 1262 } 1263 1264 err = mr_umem_get(dev, start, length, access_flags, &umem, 1265 &npages, &page_shift, &ncont, &order); 1266 1267 if (err < 0) 1268 return ERR_PTR(err); 1269 1270 use_umr = mlx5_ib_can_use_umr(dev, true, access_flags); 1271 1272 if (order <= mr_cache_max_order(dev) && use_umr) { 1273 mr = alloc_mr_from_cache(pd, umem, virt_addr, length, ncont, 1274 page_shift, order, access_flags); 1275 if (PTR_ERR(mr) == -EAGAIN) { 1276 mlx5_ib_dbg(dev, "cache empty for order %d\n", order); 1277 mr = NULL; 1278 } 1279 } else if (!MLX5_CAP_GEN(dev->mdev, umr_extended_translation_offset)) { 1280 if (access_flags & IB_ACCESS_ON_DEMAND) { 1281 err = -EINVAL; 1282 pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB\n"); 1283 goto error; 1284 } 1285 use_umr = false; 1286 } 1287 1288 if (!mr) { 1289 mutex_lock(&dev->slow_path_mutex); 1290 mr = reg_create(NULL, pd, virt_addr, length, umem, ncont, 1291 page_shift, access_flags, !use_umr); 1292 mutex_unlock(&dev->slow_path_mutex); 1293 } 1294 1295 if (IS_ERR(mr)) { 1296 err = PTR_ERR(mr); 1297 goto error; 1298 } 1299 1300 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmkey.key); 1301 1302 mr->umem = umem; 1303 set_mr_fields(dev, mr, npages, length, access_flags); 1304 1305 if (use_umr) { 1306 int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; 1307 1308 if (access_flags & IB_ACCESS_ON_DEMAND) 1309 update_xlt_flags |= MLX5_IB_UPD_XLT_ZAP; 1310 1311 err = mlx5_ib_update_xlt(mr, 0, ncont, page_shift, 1312 update_xlt_flags); 1313 1314 if (err) { 1315 dereg_mr(dev, mr); 1316 return ERR_PTR(err); 1317 } 1318 } 1319 1320 if (is_odp_mr(mr)) { 1321 to_ib_umem_odp(mr->umem)->private = mr; 1322 atomic_set(&mr->num_deferred_work, 0); 1323 err = xa_err(xa_store(&dev->odp_mkeys, 1324 mlx5_base_mkey(mr->mmkey.key), &mr->mmkey, 1325 GFP_KERNEL)); 1326 if (err) { 1327 dereg_mr(dev, mr); 1328 return ERR_PTR(err); 1329 } 1330 } 1331 1332 return &mr->ibmr; 1333 error: 1334 ib_umem_release(umem); 1335 return ERR_PTR(err); 1336 } 1337 1338 /** 1339 * mlx5_mr_cache_invalidate - Fence all DMA on the MR 1340 * @mr: The MR to fence 1341 * 1342 * Upon return the NIC will not be doing any DMA to the pages under the MR, 1343 * and any DMA inprogress will be completed. Failure of this function 1344 * indicates the HW has failed catastrophically. 1345 */ 1346 int mlx5_mr_cache_invalidate(struct mlx5_ib_mr *mr) 1347 { 1348 struct mlx5_umr_wr umrwr = {}; 1349 1350 if (mr->dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) 1351 return 0; 1352 1353 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_DISABLE_MR | 1354 MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1355 umrwr.wr.opcode = MLX5_IB_WR_UMR; 1356 umrwr.pd = mr->dev->umrc.pd; 1357 umrwr.mkey = mr->mmkey.key; 1358 umrwr.ignore_free_state = 1; 1359 1360 return mlx5_ib_post_send_wait(mr->dev, &umrwr); 1361 } 1362 1363 static int rereg_umr(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1364 int access_flags, int flags) 1365 { 1366 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1367 struct mlx5_umr_wr umrwr = {}; 1368 int err; 1369 1370 umrwr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE; 1371 1372 umrwr.wr.opcode = MLX5_IB_WR_UMR; 1373 umrwr.mkey = mr->mmkey.key; 1374 1375 if (flags & IB_MR_REREG_PD || flags & IB_MR_REREG_ACCESS) { 1376 umrwr.pd = pd; 1377 umrwr.access_flags = access_flags; 1378 umrwr.wr.send_flags |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; 1379 } 1380 1381 err = mlx5_ib_post_send_wait(dev, &umrwr); 1382 1383 return err; 1384 } 1385 1386 int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, 1387 u64 length, u64 virt_addr, int new_access_flags, 1388 struct ib_pd *new_pd, struct ib_udata *udata) 1389 { 1390 struct mlx5_ib_dev *dev = to_mdev(ib_mr->device); 1391 struct mlx5_ib_mr *mr = to_mmr(ib_mr); 1392 struct ib_pd *pd = (flags & IB_MR_REREG_PD) ? new_pd : ib_mr->pd; 1393 int access_flags = flags & IB_MR_REREG_ACCESS ? 1394 new_access_flags : 1395 mr->access_flags; 1396 int page_shift = 0; 1397 int upd_flags = 0; 1398 int npages = 0; 1399 int ncont = 0; 1400 int order = 0; 1401 u64 addr, len; 1402 int err; 1403 1404 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1405 start, virt_addr, length, access_flags); 1406 1407 atomic_sub(mr->npages, &dev->mdev->priv.reg_pages); 1408 1409 if (!mr->umem) 1410 return -EINVAL; 1411 1412 if (is_odp_mr(mr)) 1413 return -EOPNOTSUPP; 1414 1415 if (flags & IB_MR_REREG_TRANS) { 1416 addr = virt_addr; 1417 len = length; 1418 } else { 1419 addr = mr->umem->address; 1420 len = mr->umem->length; 1421 } 1422 1423 if (flags != IB_MR_REREG_PD) { 1424 /* 1425 * Replace umem. This needs to be done whether or not UMR is 1426 * used. 1427 */ 1428 flags |= IB_MR_REREG_TRANS; 1429 ib_umem_release(mr->umem); 1430 mr->umem = NULL; 1431 err = mr_umem_get(dev, addr, len, access_flags, &mr->umem, 1432 &npages, &page_shift, &ncont, &order); 1433 if (err) 1434 goto err; 1435 } 1436 1437 if (!mlx5_ib_can_use_umr(dev, true, access_flags) || 1438 (flags & IB_MR_REREG_TRANS && !use_umr_mtt_update(mr, addr, len))) { 1439 /* 1440 * UMR can't be used - MKey needs to be replaced. 1441 */ 1442 if (mr->allocated_from_cache) 1443 err = mlx5_mr_cache_invalidate(mr); 1444 else 1445 err = destroy_mkey(dev, mr); 1446 if (err) 1447 goto err; 1448 1449 mr = reg_create(ib_mr, pd, addr, len, mr->umem, ncont, 1450 page_shift, access_flags, true); 1451 1452 if (IS_ERR(mr)) { 1453 err = PTR_ERR(mr); 1454 mr = to_mmr(ib_mr); 1455 goto err; 1456 } 1457 1458 mr->allocated_from_cache = false; 1459 } else { 1460 /* 1461 * Send a UMR WQE 1462 */ 1463 mr->ibmr.pd = pd; 1464 mr->access_flags = access_flags; 1465 mr->mmkey.iova = addr; 1466 mr->mmkey.size = len; 1467 mr->mmkey.pd = to_mpd(pd)->pdn; 1468 1469 if (flags & IB_MR_REREG_TRANS) { 1470 upd_flags = MLX5_IB_UPD_XLT_ADDR; 1471 if (flags & IB_MR_REREG_PD) 1472 upd_flags |= MLX5_IB_UPD_XLT_PD; 1473 if (flags & IB_MR_REREG_ACCESS) 1474 upd_flags |= MLX5_IB_UPD_XLT_ACCESS; 1475 err = mlx5_ib_update_xlt(mr, 0, npages, page_shift, 1476 upd_flags); 1477 } else { 1478 err = rereg_umr(pd, mr, access_flags, flags); 1479 } 1480 1481 if (err) 1482 goto err; 1483 } 1484 1485 set_mr_fields(dev, mr, npages, len, access_flags); 1486 1487 return 0; 1488 1489 err: 1490 ib_umem_release(mr->umem); 1491 mr->umem = NULL; 1492 1493 clean_mr(dev, mr); 1494 return err; 1495 } 1496 1497 static int 1498 mlx5_alloc_priv_descs(struct ib_device *device, 1499 struct mlx5_ib_mr *mr, 1500 int ndescs, 1501 int desc_size) 1502 { 1503 int size = ndescs * desc_size; 1504 int add_size; 1505 int ret; 1506 1507 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); 1508 1509 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); 1510 if (!mr->descs_alloc) 1511 return -ENOMEM; 1512 1513 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); 1514 1515 mr->desc_map = dma_map_single(device->dev.parent, mr->descs, 1516 size, DMA_TO_DEVICE); 1517 if (dma_mapping_error(device->dev.parent, mr->desc_map)) { 1518 ret = -ENOMEM; 1519 goto err; 1520 } 1521 1522 return 0; 1523 err: 1524 kfree(mr->descs_alloc); 1525 1526 return ret; 1527 } 1528 1529 static void 1530 mlx5_free_priv_descs(struct mlx5_ib_mr *mr) 1531 { 1532 if (mr->descs) { 1533 struct ib_device *device = mr->ibmr.device; 1534 int size = mr->max_descs * mr->desc_size; 1535 1536 dma_unmap_single(device->dev.parent, mr->desc_map, 1537 size, DMA_TO_DEVICE); 1538 kfree(mr->descs_alloc); 1539 mr->descs = NULL; 1540 } 1541 } 1542 1543 static void clean_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1544 { 1545 int allocated_from_cache = mr->allocated_from_cache; 1546 1547 if (mr->sig) { 1548 if (mlx5_core_destroy_psv(dev->mdev, 1549 mr->sig->psv_memory.psv_idx)) 1550 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1551 mr->sig->psv_memory.psv_idx); 1552 if (mlx5_core_destroy_psv(dev->mdev, 1553 mr->sig->psv_wire.psv_idx)) 1554 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1555 mr->sig->psv_wire.psv_idx); 1556 xa_erase(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key)); 1557 kfree(mr->sig); 1558 mr->sig = NULL; 1559 } 1560 1561 if (!allocated_from_cache) { 1562 destroy_mkey(dev, mr); 1563 mlx5_free_priv_descs(mr); 1564 } 1565 } 1566 1567 static void dereg_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1568 { 1569 int npages = mr->npages; 1570 struct ib_umem *umem = mr->umem; 1571 1572 /* Stop all DMA */ 1573 if (is_odp_mr(mr)) 1574 mlx5_ib_fence_odp_mr(mr); 1575 else 1576 clean_mr(dev, mr); 1577 1578 if (mr->allocated_from_cache) 1579 mlx5_mr_cache_free(dev, mr); 1580 else 1581 kfree(mr); 1582 1583 ib_umem_release(umem); 1584 atomic_sub(npages, &dev->mdev->priv.reg_pages); 1585 1586 } 1587 1588 int mlx5_ib_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 1589 { 1590 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1591 1592 if (ibmr->type == IB_MR_TYPE_INTEGRITY) { 1593 dereg_mr(to_mdev(mmr->mtt_mr->ibmr.device), mmr->mtt_mr); 1594 dereg_mr(to_mdev(mmr->klm_mr->ibmr.device), mmr->klm_mr); 1595 } 1596 1597 if (is_odp_mr(mmr) && to_ib_umem_odp(mmr->umem)->is_implicit_odp) { 1598 mlx5_ib_free_implicit_mr(mmr); 1599 return 0; 1600 } 1601 1602 dereg_mr(to_mdev(ibmr->device), mmr); 1603 1604 return 0; 1605 } 1606 1607 static void mlx5_set_umr_free_mkey(struct ib_pd *pd, u32 *in, int ndescs, 1608 int access_mode, int page_shift) 1609 { 1610 void *mkc; 1611 1612 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1613 1614 MLX5_SET(mkc, mkc, free, 1); 1615 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1616 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1617 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 1618 MLX5_SET(mkc, mkc, access_mode_1_0, access_mode & 0x3); 1619 MLX5_SET(mkc, mkc, access_mode_4_2, (access_mode >> 2) & 0x7); 1620 MLX5_SET(mkc, mkc, umr_en, 1); 1621 MLX5_SET(mkc, mkc, log_page_size, page_shift); 1622 } 1623 1624 static int _mlx5_alloc_mkey_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1625 int ndescs, int desc_size, int page_shift, 1626 int access_mode, u32 *in, int inlen) 1627 { 1628 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1629 int err; 1630 1631 mr->access_mode = access_mode; 1632 mr->desc_size = desc_size; 1633 mr->max_descs = ndescs; 1634 1635 err = mlx5_alloc_priv_descs(pd->device, mr, ndescs, desc_size); 1636 if (err) 1637 return err; 1638 1639 mlx5_set_umr_free_mkey(pd, in, ndescs, access_mode, page_shift); 1640 1641 err = mlx5_core_create_mkey(dev->mdev, &mr->mmkey, in, inlen); 1642 if (err) 1643 goto err_free_descs; 1644 1645 mr->mmkey.type = MLX5_MKEY_MR; 1646 mr->ibmr.lkey = mr->mmkey.key; 1647 mr->ibmr.rkey = mr->mmkey.key; 1648 1649 return 0; 1650 1651 err_free_descs: 1652 mlx5_free_priv_descs(mr); 1653 return err; 1654 } 1655 1656 static struct mlx5_ib_mr *mlx5_ib_alloc_pi_mr(struct ib_pd *pd, 1657 u32 max_num_sg, u32 max_num_meta_sg, 1658 int desc_size, int access_mode) 1659 { 1660 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1661 int ndescs = ALIGN(max_num_sg + max_num_meta_sg, 4); 1662 int page_shift = 0; 1663 struct mlx5_ib_mr *mr; 1664 u32 *in; 1665 int err; 1666 1667 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1668 if (!mr) 1669 return ERR_PTR(-ENOMEM); 1670 1671 mr->ibmr.pd = pd; 1672 mr->ibmr.device = pd->device; 1673 1674 in = kzalloc(inlen, GFP_KERNEL); 1675 if (!in) { 1676 err = -ENOMEM; 1677 goto err_free; 1678 } 1679 1680 if (access_mode == MLX5_MKC_ACCESS_MODE_MTT) 1681 page_shift = PAGE_SHIFT; 1682 1683 err = _mlx5_alloc_mkey_descs(pd, mr, ndescs, desc_size, page_shift, 1684 access_mode, in, inlen); 1685 if (err) 1686 goto err_free_in; 1687 1688 mr->umem = NULL; 1689 kfree(in); 1690 1691 return mr; 1692 1693 err_free_in: 1694 kfree(in); 1695 err_free: 1696 kfree(mr); 1697 return ERR_PTR(err); 1698 } 1699 1700 static int mlx5_alloc_mem_reg_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1701 int ndescs, u32 *in, int inlen) 1702 { 1703 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_mtt), 1704 PAGE_SHIFT, MLX5_MKC_ACCESS_MODE_MTT, in, 1705 inlen); 1706 } 1707 1708 static int mlx5_alloc_sg_gaps_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1709 int ndescs, u32 *in, int inlen) 1710 { 1711 return _mlx5_alloc_mkey_descs(pd, mr, ndescs, sizeof(struct mlx5_klm), 1712 0, MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 1713 } 1714 1715 static int mlx5_alloc_integrity_descs(struct ib_pd *pd, struct mlx5_ib_mr *mr, 1716 int max_num_sg, int max_num_meta_sg, 1717 u32 *in, int inlen) 1718 { 1719 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1720 u32 psv_index[2]; 1721 void *mkc; 1722 int err; 1723 1724 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 1725 if (!mr->sig) 1726 return -ENOMEM; 1727 1728 /* create mem & wire PSVs */ 1729 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 2, psv_index); 1730 if (err) 1731 goto err_free_sig; 1732 1733 mr->sig->psv_memory.psv_idx = psv_index[0]; 1734 mr->sig->psv_wire.psv_idx = psv_index[1]; 1735 1736 mr->sig->sig_status_checked = true; 1737 mr->sig->sig_err_exists = false; 1738 /* Next UMR, Arm SIGERR */ 1739 ++mr->sig->sigerr_count; 1740 mr->klm_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 1741 sizeof(struct mlx5_klm), 1742 MLX5_MKC_ACCESS_MODE_KLMS); 1743 if (IS_ERR(mr->klm_mr)) { 1744 err = PTR_ERR(mr->klm_mr); 1745 goto err_destroy_psv; 1746 } 1747 mr->mtt_mr = mlx5_ib_alloc_pi_mr(pd, max_num_sg, max_num_meta_sg, 1748 sizeof(struct mlx5_mtt), 1749 MLX5_MKC_ACCESS_MODE_MTT); 1750 if (IS_ERR(mr->mtt_mr)) { 1751 err = PTR_ERR(mr->mtt_mr); 1752 goto err_free_klm_mr; 1753 } 1754 1755 /* Set bsf descriptors for mkey */ 1756 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1757 MLX5_SET(mkc, mkc, bsf_en, 1); 1758 MLX5_SET(mkc, mkc, bsf_octword_size, MLX5_MKEY_BSF_OCTO_SIZE); 1759 1760 err = _mlx5_alloc_mkey_descs(pd, mr, 4, sizeof(struct mlx5_klm), 0, 1761 MLX5_MKC_ACCESS_MODE_KLMS, in, inlen); 1762 if (err) 1763 goto err_free_mtt_mr; 1764 1765 err = xa_err(xa_store(&dev->sig_mrs, mlx5_base_mkey(mr->mmkey.key), 1766 mr->sig, GFP_KERNEL)); 1767 if (err) 1768 goto err_free_descs; 1769 return 0; 1770 1771 err_free_descs: 1772 destroy_mkey(dev, mr); 1773 mlx5_free_priv_descs(mr); 1774 err_free_mtt_mr: 1775 dereg_mr(to_mdev(mr->mtt_mr->ibmr.device), mr->mtt_mr); 1776 mr->mtt_mr = NULL; 1777 err_free_klm_mr: 1778 dereg_mr(to_mdev(mr->klm_mr->ibmr.device), mr->klm_mr); 1779 mr->klm_mr = NULL; 1780 err_destroy_psv: 1781 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_memory.psv_idx)) 1782 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1783 mr->sig->psv_memory.psv_idx); 1784 if (mlx5_core_destroy_psv(dev->mdev, mr->sig->psv_wire.psv_idx)) 1785 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1786 mr->sig->psv_wire.psv_idx); 1787 err_free_sig: 1788 kfree(mr->sig); 1789 1790 return err; 1791 } 1792 1793 static struct ib_mr *__mlx5_ib_alloc_mr(struct ib_pd *pd, 1794 enum ib_mr_type mr_type, u32 max_num_sg, 1795 u32 max_num_meta_sg) 1796 { 1797 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1798 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1799 int ndescs = ALIGN(max_num_sg, 4); 1800 struct mlx5_ib_mr *mr; 1801 u32 *in; 1802 int err; 1803 1804 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1805 if (!mr) 1806 return ERR_PTR(-ENOMEM); 1807 1808 in = kzalloc(inlen, GFP_KERNEL); 1809 if (!in) { 1810 err = -ENOMEM; 1811 goto err_free; 1812 } 1813 1814 mr->ibmr.device = pd->device; 1815 mr->umem = NULL; 1816 1817 switch (mr_type) { 1818 case IB_MR_TYPE_MEM_REG: 1819 err = mlx5_alloc_mem_reg_descs(pd, mr, ndescs, in, inlen); 1820 break; 1821 case IB_MR_TYPE_SG_GAPS: 1822 err = mlx5_alloc_sg_gaps_descs(pd, mr, ndescs, in, inlen); 1823 break; 1824 case IB_MR_TYPE_INTEGRITY: 1825 err = mlx5_alloc_integrity_descs(pd, mr, max_num_sg, 1826 max_num_meta_sg, in, inlen); 1827 break; 1828 default: 1829 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); 1830 err = -EINVAL; 1831 } 1832 1833 if (err) 1834 goto err_free_in; 1835 1836 kfree(in); 1837 1838 return &mr->ibmr; 1839 1840 err_free_in: 1841 kfree(in); 1842 err_free: 1843 kfree(mr); 1844 return ERR_PTR(err); 1845 } 1846 1847 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, enum ib_mr_type mr_type, 1848 u32 max_num_sg, struct ib_udata *udata) 1849 { 1850 return __mlx5_ib_alloc_mr(pd, mr_type, max_num_sg, 0); 1851 } 1852 1853 struct ib_mr *mlx5_ib_alloc_mr_integrity(struct ib_pd *pd, 1854 u32 max_num_sg, u32 max_num_meta_sg) 1855 { 1856 return __mlx5_ib_alloc_mr(pd, IB_MR_TYPE_INTEGRITY, max_num_sg, 1857 max_num_meta_sg); 1858 } 1859 1860 struct ib_mw *mlx5_ib_alloc_mw(struct ib_pd *pd, enum ib_mw_type type, 1861 struct ib_udata *udata) 1862 { 1863 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1864 int inlen = MLX5_ST_SZ_BYTES(create_mkey_in); 1865 struct mlx5_ib_mw *mw = NULL; 1866 u32 *in = NULL; 1867 void *mkc; 1868 int ndescs; 1869 int err; 1870 struct mlx5_ib_alloc_mw req = {}; 1871 struct { 1872 __u32 comp_mask; 1873 __u32 response_length; 1874 } resp = {}; 1875 1876 err = ib_copy_from_udata(&req, udata, min(udata->inlen, sizeof(req))); 1877 if (err) 1878 return ERR_PTR(err); 1879 1880 if (req.comp_mask || req.reserved1 || req.reserved2) 1881 return ERR_PTR(-EOPNOTSUPP); 1882 1883 if (udata->inlen > sizeof(req) && 1884 !ib_is_udata_cleared(udata, sizeof(req), 1885 udata->inlen - sizeof(req))) 1886 return ERR_PTR(-EOPNOTSUPP); 1887 1888 ndescs = req.num_klms ? roundup(req.num_klms, 4) : roundup(1, 4); 1889 1890 mw = kzalloc(sizeof(*mw), GFP_KERNEL); 1891 in = kzalloc(inlen, GFP_KERNEL); 1892 if (!mw || !in) { 1893 err = -ENOMEM; 1894 goto free; 1895 } 1896 1897 mkc = MLX5_ADDR_OF(create_mkey_in, in, memory_key_mkey_entry); 1898 1899 MLX5_SET(mkc, mkc, free, 1); 1900 MLX5_SET(mkc, mkc, translations_octword_size, ndescs); 1901 MLX5_SET(mkc, mkc, pd, to_mpd(pd)->pdn); 1902 MLX5_SET(mkc, mkc, umr_en, 1); 1903 MLX5_SET(mkc, mkc, lr, 1); 1904 MLX5_SET(mkc, mkc, access_mode_1_0, MLX5_MKC_ACCESS_MODE_KLMS); 1905 MLX5_SET(mkc, mkc, en_rinval, !!((type == IB_MW_TYPE_2))); 1906 MLX5_SET(mkc, mkc, qpn, 0xffffff); 1907 1908 err = mlx5_core_create_mkey(dev->mdev, &mw->mmkey, in, inlen); 1909 if (err) 1910 goto free; 1911 1912 mw->mmkey.type = MLX5_MKEY_MW; 1913 mw->ibmw.rkey = mw->mmkey.key; 1914 mw->ndescs = ndescs; 1915 1916 resp.response_length = min(offsetof(typeof(resp), response_length) + 1917 sizeof(resp.response_length), udata->outlen); 1918 if (resp.response_length) { 1919 err = ib_copy_to_udata(udata, &resp, resp.response_length); 1920 if (err) { 1921 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); 1922 goto free; 1923 } 1924 } 1925 1926 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { 1927 err = xa_err(xa_store(&dev->odp_mkeys, 1928 mlx5_base_mkey(mw->mmkey.key), &mw->mmkey, 1929 GFP_KERNEL)); 1930 if (err) 1931 goto free_mkey; 1932 } 1933 1934 kfree(in); 1935 return &mw->ibmw; 1936 1937 free_mkey: 1938 mlx5_core_destroy_mkey(dev->mdev, &mw->mmkey); 1939 free: 1940 kfree(mw); 1941 kfree(in); 1942 return ERR_PTR(err); 1943 } 1944 1945 int mlx5_ib_dealloc_mw(struct ib_mw *mw) 1946 { 1947 struct mlx5_ib_dev *dev = to_mdev(mw->device); 1948 struct mlx5_ib_mw *mmw = to_mmw(mw); 1949 int err; 1950 1951 if (IS_ENABLED(CONFIG_INFINIBAND_ON_DEMAND_PAGING)) { 1952 xa_erase(&dev->odp_mkeys, mlx5_base_mkey(mmw->mmkey.key)); 1953 /* 1954 * pagefault_single_data_segment() may be accessing mmw under 1955 * SRCU if the user bound an ODP MR to this MW. 1956 */ 1957 synchronize_srcu(&dev->odp_srcu); 1958 } 1959 1960 err = mlx5_core_destroy_mkey(dev->mdev, &mmw->mmkey); 1961 if (err) 1962 return err; 1963 kfree(mmw); 1964 return 0; 1965 } 1966 1967 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1968 struct ib_mr_status *mr_status) 1969 { 1970 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1971 int ret = 0; 1972 1973 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 1974 pr_err("Invalid status check mask\n"); 1975 ret = -EINVAL; 1976 goto done; 1977 } 1978 1979 mr_status->fail_status = 0; 1980 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 1981 if (!mmr->sig) { 1982 ret = -EINVAL; 1983 pr_err("signature status check requested on a non-signature enabled MR\n"); 1984 goto done; 1985 } 1986 1987 mmr->sig->sig_status_checked = true; 1988 if (!mmr->sig->sig_err_exists) 1989 goto done; 1990 1991 if (ibmr->lkey == mmr->sig->err_item.key) 1992 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 1993 sizeof(mr_status->sig_err)); 1994 else { 1995 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 1996 mr_status->sig_err.sig_err_offset = 0; 1997 mr_status->sig_err.key = mmr->sig->err_item.key; 1998 } 1999 2000 mmr->sig->sig_err_exists = false; 2001 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 2002 } 2003 2004 done: 2005 return ret; 2006 } 2007 2008 static int 2009 mlx5_ib_map_pa_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2010 int data_sg_nents, unsigned int *data_sg_offset, 2011 struct scatterlist *meta_sg, int meta_sg_nents, 2012 unsigned int *meta_sg_offset) 2013 { 2014 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2015 unsigned int sg_offset = 0; 2016 int n = 0; 2017 2018 mr->meta_length = 0; 2019 if (data_sg_nents == 1) { 2020 n++; 2021 mr->ndescs = 1; 2022 if (data_sg_offset) 2023 sg_offset = *data_sg_offset; 2024 mr->data_length = sg_dma_len(data_sg) - sg_offset; 2025 mr->data_iova = sg_dma_address(data_sg) + sg_offset; 2026 if (meta_sg_nents == 1) { 2027 n++; 2028 mr->meta_ndescs = 1; 2029 if (meta_sg_offset) 2030 sg_offset = *meta_sg_offset; 2031 else 2032 sg_offset = 0; 2033 mr->meta_length = sg_dma_len(meta_sg) - sg_offset; 2034 mr->pi_iova = sg_dma_address(meta_sg) + sg_offset; 2035 } 2036 ibmr->length = mr->data_length + mr->meta_length; 2037 } 2038 2039 return n; 2040 } 2041 2042 static int 2043 mlx5_ib_sg_to_klms(struct mlx5_ib_mr *mr, 2044 struct scatterlist *sgl, 2045 unsigned short sg_nents, 2046 unsigned int *sg_offset_p, 2047 struct scatterlist *meta_sgl, 2048 unsigned short meta_sg_nents, 2049 unsigned int *meta_sg_offset_p) 2050 { 2051 struct scatterlist *sg = sgl; 2052 struct mlx5_klm *klms = mr->descs; 2053 unsigned int sg_offset = sg_offset_p ? *sg_offset_p : 0; 2054 u32 lkey = mr->ibmr.pd->local_dma_lkey; 2055 int i, j = 0; 2056 2057 mr->ibmr.iova = sg_dma_address(sg) + sg_offset; 2058 mr->ibmr.length = 0; 2059 2060 for_each_sg(sgl, sg, sg_nents, i) { 2061 if (unlikely(i >= mr->max_descs)) 2062 break; 2063 klms[i].va = cpu_to_be64(sg_dma_address(sg) + sg_offset); 2064 klms[i].bcount = cpu_to_be32(sg_dma_len(sg) - sg_offset); 2065 klms[i].key = cpu_to_be32(lkey); 2066 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2067 2068 sg_offset = 0; 2069 } 2070 2071 if (sg_offset_p) 2072 *sg_offset_p = sg_offset; 2073 2074 mr->ndescs = i; 2075 mr->data_length = mr->ibmr.length; 2076 2077 if (meta_sg_nents) { 2078 sg = meta_sgl; 2079 sg_offset = meta_sg_offset_p ? *meta_sg_offset_p : 0; 2080 for_each_sg(meta_sgl, sg, meta_sg_nents, j) { 2081 if (unlikely(i + j >= mr->max_descs)) 2082 break; 2083 klms[i + j].va = cpu_to_be64(sg_dma_address(sg) + 2084 sg_offset); 2085 klms[i + j].bcount = cpu_to_be32(sg_dma_len(sg) - 2086 sg_offset); 2087 klms[i + j].key = cpu_to_be32(lkey); 2088 mr->ibmr.length += sg_dma_len(sg) - sg_offset; 2089 2090 sg_offset = 0; 2091 } 2092 if (meta_sg_offset_p) 2093 *meta_sg_offset_p = sg_offset; 2094 2095 mr->meta_ndescs = j; 2096 mr->meta_length = mr->ibmr.length - mr->data_length; 2097 } 2098 2099 return i + j; 2100 } 2101 2102 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 2103 { 2104 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2105 __be64 *descs; 2106 2107 if (unlikely(mr->ndescs == mr->max_descs)) 2108 return -ENOMEM; 2109 2110 descs = mr->descs; 2111 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2112 2113 return 0; 2114 } 2115 2116 static int mlx5_set_page_pi(struct ib_mr *ibmr, u64 addr) 2117 { 2118 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2119 __be64 *descs; 2120 2121 if (unlikely(mr->ndescs + mr->meta_ndescs == mr->max_descs)) 2122 return -ENOMEM; 2123 2124 descs = mr->descs; 2125 descs[mr->ndescs + mr->meta_ndescs++] = 2126 cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 2127 2128 return 0; 2129 } 2130 2131 static int 2132 mlx5_ib_map_mtt_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2133 int data_sg_nents, unsigned int *data_sg_offset, 2134 struct scatterlist *meta_sg, int meta_sg_nents, 2135 unsigned int *meta_sg_offset) 2136 { 2137 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2138 struct mlx5_ib_mr *pi_mr = mr->mtt_mr; 2139 int n; 2140 2141 pi_mr->ndescs = 0; 2142 pi_mr->meta_ndescs = 0; 2143 pi_mr->meta_length = 0; 2144 2145 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2146 pi_mr->desc_size * pi_mr->max_descs, 2147 DMA_TO_DEVICE); 2148 2149 pi_mr->ibmr.page_size = ibmr->page_size; 2150 n = ib_sg_to_pages(&pi_mr->ibmr, data_sg, data_sg_nents, data_sg_offset, 2151 mlx5_set_page); 2152 if (n != data_sg_nents) 2153 return n; 2154 2155 pi_mr->data_iova = pi_mr->ibmr.iova; 2156 pi_mr->data_length = pi_mr->ibmr.length; 2157 pi_mr->ibmr.length = pi_mr->data_length; 2158 ibmr->length = pi_mr->data_length; 2159 2160 if (meta_sg_nents) { 2161 u64 page_mask = ~((u64)ibmr->page_size - 1); 2162 u64 iova = pi_mr->data_iova; 2163 2164 n += ib_sg_to_pages(&pi_mr->ibmr, meta_sg, meta_sg_nents, 2165 meta_sg_offset, mlx5_set_page_pi); 2166 2167 pi_mr->meta_length = pi_mr->ibmr.length; 2168 /* 2169 * PI address for the HW is the offset of the metadata address 2170 * relative to the first data page address. 2171 * It equals to first data page address + size of data pages + 2172 * metadata offset at the first metadata page 2173 */ 2174 pi_mr->pi_iova = (iova & page_mask) + 2175 pi_mr->ndescs * ibmr->page_size + 2176 (pi_mr->ibmr.iova & ~page_mask); 2177 /* 2178 * In order to use one MTT MR for data and metadata, we register 2179 * also the gaps between the end of the data and the start of 2180 * the metadata (the sig MR will verify that the HW will access 2181 * to right addresses). This mapping is safe because we use 2182 * internal mkey for the registration. 2183 */ 2184 pi_mr->ibmr.length = pi_mr->pi_iova + pi_mr->meta_length - iova; 2185 pi_mr->ibmr.iova = iova; 2186 ibmr->length += pi_mr->meta_length; 2187 } 2188 2189 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2190 pi_mr->desc_size * pi_mr->max_descs, 2191 DMA_TO_DEVICE); 2192 2193 return n; 2194 } 2195 2196 static int 2197 mlx5_ib_map_klm_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2198 int data_sg_nents, unsigned int *data_sg_offset, 2199 struct scatterlist *meta_sg, int meta_sg_nents, 2200 unsigned int *meta_sg_offset) 2201 { 2202 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2203 struct mlx5_ib_mr *pi_mr = mr->klm_mr; 2204 int n; 2205 2206 pi_mr->ndescs = 0; 2207 pi_mr->meta_ndescs = 0; 2208 pi_mr->meta_length = 0; 2209 2210 ib_dma_sync_single_for_cpu(ibmr->device, pi_mr->desc_map, 2211 pi_mr->desc_size * pi_mr->max_descs, 2212 DMA_TO_DEVICE); 2213 2214 n = mlx5_ib_sg_to_klms(pi_mr, data_sg, data_sg_nents, data_sg_offset, 2215 meta_sg, meta_sg_nents, meta_sg_offset); 2216 2217 ib_dma_sync_single_for_device(ibmr->device, pi_mr->desc_map, 2218 pi_mr->desc_size * pi_mr->max_descs, 2219 DMA_TO_DEVICE); 2220 2221 /* This is zero-based memory region */ 2222 pi_mr->data_iova = 0; 2223 pi_mr->ibmr.iova = 0; 2224 pi_mr->pi_iova = pi_mr->data_length; 2225 ibmr->length = pi_mr->ibmr.length; 2226 2227 return n; 2228 } 2229 2230 int mlx5_ib_map_mr_sg_pi(struct ib_mr *ibmr, struct scatterlist *data_sg, 2231 int data_sg_nents, unsigned int *data_sg_offset, 2232 struct scatterlist *meta_sg, int meta_sg_nents, 2233 unsigned int *meta_sg_offset) 2234 { 2235 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2236 struct mlx5_ib_mr *pi_mr = NULL; 2237 int n; 2238 2239 WARN_ON(ibmr->type != IB_MR_TYPE_INTEGRITY); 2240 2241 mr->ndescs = 0; 2242 mr->data_length = 0; 2243 mr->data_iova = 0; 2244 mr->meta_ndescs = 0; 2245 mr->pi_iova = 0; 2246 /* 2247 * As a performance optimization, if possible, there is no need to 2248 * perform UMR operation to register the data/metadata buffers. 2249 * First try to map the sg lists to PA descriptors with local_dma_lkey. 2250 * Fallback to UMR only in case of a failure. 2251 */ 2252 n = mlx5_ib_map_pa_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2253 data_sg_offset, meta_sg, meta_sg_nents, 2254 meta_sg_offset); 2255 if (n == data_sg_nents + meta_sg_nents) 2256 goto out; 2257 /* 2258 * As a performance optimization, if possible, there is no need to map 2259 * the sg lists to KLM descriptors. First try to map the sg lists to MTT 2260 * descriptors and fallback to KLM only in case of a failure. 2261 * It's more efficient for the HW to work with MTT descriptors 2262 * (especially in high load). 2263 * Use KLM (indirect access) only if it's mandatory. 2264 */ 2265 pi_mr = mr->mtt_mr; 2266 n = mlx5_ib_map_mtt_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2267 data_sg_offset, meta_sg, meta_sg_nents, 2268 meta_sg_offset); 2269 if (n == data_sg_nents + meta_sg_nents) 2270 goto out; 2271 2272 pi_mr = mr->klm_mr; 2273 n = mlx5_ib_map_klm_mr_sg_pi(ibmr, data_sg, data_sg_nents, 2274 data_sg_offset, meta_sg, meta_sg_nents, 2275 meta_sg_offset); 2276 if (unlikely(n != data_sg_nents + meta_sg_nents)) 2277 return -ENOMEM; 2278 2279 out: 2280 /* This is zero-based memory region */ 2281 ibmr->iova = 0; 2282 mr->pi_mr = pi_mr; 2283 if (pi_mr) 2284 ibmr->sig_attrs->meta_length = pi_mr->meta_length; 2285 else 2286 ibmr->sig_attrs->meta_length = mr->meta_length; 2287 2288 return 0; 2289 } 2290 2291 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, int sg_nents, 2292 unsigned int *sg_offset) 2293 { 2294 struct mlx5_ib_mr *mr = to_mmr(ibmr); 2295 int n; 2296 2297 mr->ndescs = 0; 2298 2299 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, 2300 mr->desc_size * mr->max_descs, 2301 DMA_TO_DEVICE); 2302 2303 if (mr->access_mode == MLX5_MKC_ACCESS_MODE_KLMS) 2304 n = mlx5_ib_sg_to_klms(mr, sg, sg_nents, sg_offset, NULL, 0, 2305 NULL); 2306 else 2307 n = ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 2308 mlx5_set_page); 2309 2310 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 2311 mr->desc_size * mr->max_descs, 2312 DMA_TO_DEVICE); 2313 2314 return n; 2315 } 2316