1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 34 #include <linux/kref.h> 35 #include <linux/random.h> 36 #include <linux/debugfs.h> 37 #include <linux/export.h> 38 #include <linux/delay.h> 39 #include <rdma/ib_umem.h> 40 #include <rdma/ib_umem_odp.h> 41 #include <rdma/ib_verbs.h> 42 #include "mlx5_ib.h" 43 44 enum { 45 MAX_PENDING_REG_MR = 8, 46 }; 47 48 #define MLX5_UMR_ALIGN 2048 49 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 50 static __be64 mlx5_ib_update_mtt_emergency_buffer[ 51 MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)] 52 __aligned(MLX5_UMR_ALIGN); 53 static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); 54 #endif 55 56 static int clean_mr(struct mlx5_ib_mr *mr); 57 58 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 59 { 60 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); 61 62 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 63 /* Wait until all page fault handlers using the mr complete. */ 64 synchronize_srcu(&dev->mr_srcu); 65 #endif 66 67 return err; 68 } 69 70 static int order2idx(struct mlx5_ib_dev *dev, int order) 71 { 72 struct mlx5_mr_cache *cache = &dev->cache; 73 74 if (order < cache->ent[0].order) 75 return 0; 76 else 77 return order - cache->ent[0].order; 78 } 79 80 static void reg_mr_callback(int status, void *context) 81 { 82 struct mlx5_ib_mr *mr = context; 83 struct mlx5_ib_dev *dev = mr->dev; 84 struct mlx5_mr_cache *cache = &dev->cache; 85 int c = order2idx(dev, mr->order); 86 struct mlx5_cache_ent *ent = &cache->ent[c]; 87 u8 key; 88 unsigned long flags; 89 struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; 90 int err; 91 92 spin_lock_irqsave(&ent->lock, flags); 93 ent->pending--; 94 spin_unlock_irqrestore(&ent->lock, flags); 95 if (status) { 96 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 97 kfree(mr); 98 dev->fill_delay = 1; 99 mod_timer(&dev->delay_timer, jiffies + HZ); 100 return; 101 } 102 103 if (mr->out.hdr.status) { 104 mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", 105 mr->out.hdr.status, 106 be32_to_cpu(mr->out.hdr.syndrome)); 107 kfree(mr); 108 dev->fill_delay = 1; 109 mod_timer(&dev->delay_timer, jiffies + HZ); 110 return; 111 } 112 113 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 114 key = dev->mdev->priv.mkey_key++; 115 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 116 mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; 117 118 cache->last_add = jiffies; 119 120 spin_lock_irqsave(&ent->lock, flags); 121 list_add_tail(&mr->list, &ent->head); 122 ent->cur++; 123 ent->size++; 124 spin_unlock_irqrestore(&ent->lock, flags); 125 126 write_lock_irqsave(&table->lock, flags); 127 err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), 128 &mr->mmr); 129 if (err) 130 pr_err("Error inserting to mr tree. 0x%x\n", -err); 131 write_unlock_irqrestore(&table->lock, flags); 132 } 133 134 static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 135 { 136 struct mlx5_mr_cache *cache = &dev->cache; 137 struct mlx5_cache_ent *ent = &cache->ent[c]; 138 struct mlx5_create_mkey_mbox_in *in; 139 struct mlx5_ib_mr *mr; 140 int npages = 1 << ent->order; 141 int err = 0; 142 int i; 143 144 in = kzalloc(sizeof(*in), GFP_KERNEL); 145 if (!in) 146 return -ENOMEM; 147 148 for (i = 0; i < num; i++) { 149 if (ent->pending >= MAX_PENDING_REG_MR) { 150 err = -EAGAIN; 151 break; 152 } 153 154 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 155 if (!mr) { 156 err = -ENOMEM; 157 break; 158 } 159 mr->order = ent->order; 160 mr->umred = 1; 161 mr->dev = dev; 162 in->seg.status = MLX5_MKEY_STATUS_FREE; 163 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); 164 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 165 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; 166 in->seg.log2_page_size = 12; 167 168 spin_lock_irq(&ent->lock); 169 ent->pending++; 170 spin_unlock_irq(&ent->lock); 171 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, 172 sizeof(*in), reg_mr_callback, 173 mr, &mr->out); 174 if (err) { 175 spin_lock_irq(&ent->lock); 176 ent->pending--; 177 spin_unlock_irq(&ent->lock); 178 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 179 kfree(mr); 180 break; 181 } 182 } 183 184 kfree(in); 185 return err; 186 } 187 188 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 189 { 190 struct mlx5_mr_cache *cache = &dev->cache; 191 struct mlx5_cache_ent *ent = &cache->ent[c]; 192 struct mlx5_ib_mr *mr; 193 int err; 194 int i; 195 196 for (i = 0; i < num; i++) { 197 spin_lock_irq(&ent->lock); 198 if (list_empty(&ent->head)) { 199 spin_unlock_irq(&ent->lock); 200 return; 201 } 202 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 203 list_del(&mr->list); 204 ent->cur--; 205 ent->size--; 206 spin_unlock_irq(&ent->lock); 207 err = destroy_mkey(dev, mr); 208 if (err) 209 mlx5_ib_warn(dev, "failed destroy mkey\n"); 210 else 211 kfree(mr); 212 } 213 } 214 215 static ssize_t size_write(struct file *filp, const char __user *buf, 216 size_t count, loff_t *pos) 217 { 218 struct mlx5_cache_ent *ent = filp->private_data; 219 struct mlx5_ib_dev *dev = ent->dev; 220 char lbuf[20]; 221 u32 var; 222 int err; 223 int c; 224 225 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 226 return -EFAULT; 227 228 c = order2idx(dev, ent->order); 229 lbuf[sizeof(lbuf) - 1] = 0; 230 231 if (sscanf(lbuf, "%u", &var) != 1) 232 return -EINVAL; 233 234 if (var < ent->limit) 235 return -EINVAL; 236 237 if (var > ent->size) { 238 do { 239 err = add_keys(dev, c, var - ent->size); 240 if (err && err != -EAGAIN) 241 return err; 242 243 usleep_range(3000, 5000); 244 } while (err); 245 } else if (var < ent->size) { 246 remove_keys(dev, c, ent->size - var); 247 } 248 249 return count; 250 } 251 252 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 253 loff_t *pos) 254 { 255 struct mlx5_cache_ent *ent = filp->private_data; 256 char lbuf[20]; 257 int err; 258 259 if (*pos) 260 return 0; 261 262 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 263 if (err < 0) 264 return err; 265 266 if (copy_to_user(buf, lbuf, err)) 267 return -EFAULT; 268 269 *pos += err; 270 271 return err; 272 } 273 274 static const struct file_operations size_fops = { 275 .owner = THIS_MODULE, 276 .open = simple_open, 277 .write = size_write, 278 .read = size_read, 279 }; 280 281 static ssize_t limit_write(struct file *filp, const char __user *buf, 282 size_t count, loff_t *pos) 283 { 284 struct mlx5_cache_ent *ent = filp->private_data; 285 struct mlx5_ib_dev *dev = ent->dev; 286 char lbuf[20]; 287 u32 var; 288 int err; 289 int c; 290 291 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 292 return -EFAULT; 293 294 c = order2idx(dev, ent->order); 295 lbuf[sizeof(lbuf) - 1] = 0; 296 297 if (sscanf(lbuf, "%u", &var) != 1) 298 return -EINVAL; 299 300 if (var > ent->size) 301 return -EINVAL; 302 303 ent->limit = var; 304 305 if (ent->cur < ent->limit) { 306 err = add_keys(dev, c, 2 * ent->limit - ent->cur); 307 if (err) 308 return err; 309 } 310 311 return count; 312 } 313 314 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 315 loff_t *pos) 316 { 317 struct mlx5_cache_ent *ent = filp->private_data; 318 char lbuf[20]; 319 int err; 320 321 if (*pos) 322 return 0; 323 324 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 325 if (err < 0) 326 return err; 327 328 if (copy_to_user(buf, lbuf, err)) 329 return -EFAULT; 330 331 *pos += err; 332 333 return err; 334 } 335 336 static const struct file_operations limit_fops = { 337 .owner = THIS_MODULE, 338 .open = simple_open, 339 .write = limit_write, 340 .read = limit_read, 341 }; 342 343 static int someone_adding(struct mlx5_mr_cache *cache) 344 { 345 int i; 346 347 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 348 if (cache->ent[i].cur < cache->ent[i].limit) 349 return 1; 350 } 351 352 return 0; 353 } 354 355 static void __cache_work_func(struct mlx5_cache_ent *ent) 356 { 357 struct mlx5_ib_dev *dev = ent->dev; 358 struct mlx5_mr_cache *cache = &dev->cache; 359 int i = order2idx(dev, ent->order); 360 int err; 361 362 if (cache->stopped) 363 return; 364 365 ent = &dev->cache.ent[i]; 366 if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 367 err = add_keys(dev, i, 1); 368 if (ent->cur < 2 * ent->limit) { 369 if (err == -EAGAIN) { 370 mlx5_ib_dbg(dev, "returned eagain, order %d\n", 371 i + 2); 372 queue_delayed_work(cache->wq, &ent->dwork, 373 msecs_to_jiffies(3)); 374 } else if (err) { 375 mlx5_ib_warn(dev, "command failed order %d, err %d\n", 376 i + 2, err); 377 queue_delayed_work(cache->wq, &ent->dwork, 378 msecs_to_jiffies(1000)); 379 } else { 380 queue_work(cache->wq, &ent->work); 381 } 382 } 383 } else if (ent->cur > 2 * ent->limit) { 384 /* 385 * The remove_keys() logic is performed as garbage collection 386 * task. Such task is intended to be run when no other active 387 * processes are running. 388 * 389 * The need_resched() will return TRUE if there are user tasks 390 * to be activated in near future. 391 * 392 * In such case, we don't execute remove_keys() and postpone 393 * the garbage collection work to try to run in next cycle, 394 * in order to free CPU resources to other tasks. 395 */ 396 if (!need_resched() && !someone_adding(cache) && 397 time_after(jiffies, cache->last_add + 300 * HZ)) { 398 remove_keys(dev, i, 1); 399 if (ent->cur > ent->limit) 400 queue_work(cache->wq, &ent->work); 401 } else { 402 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 403 } 404 } 405 } 406 407 static void delayed_cache_work_func(struct work_struct *work) 408 { 409 struct mlx5_cache_ent *ent; 410 411 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 412 __cache_work_func(ent); 413 } 414 415 static void cache_work_func(struct work_struct *work) 416 { 417 struct mlx5_cache_ent *ent; 418 419 ent = container_of(work, struct mlx5_cache_ent, work); 420 __cache_work_func(ent); 421 } 422 423 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 424 { 425 struct mlx5_mr_cache *cache = &dev->cache; 426 struct mlx5_ib_mr *mr = NULL; 427 struct mlx5_cache_ent *ent; 428 int c; 429 int i; 430 431 c = order2idx(dev, order); 432 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 433 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 434 return NULL; 435 } 436 437 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { 438 ent = &cache->ent[i]; 439 440 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 441 442 spin_lock_irq(&ent->lock); 443 if (!list_empty(&ent->head)) { 444 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 445 list); 446 list_del(&mr->list); 447 ent->cur--; 448 spin_unlock_irq(&ent->lock); 449 if (ent->cur < ent->limit) 450 queue_work(cache->wq, &ent->work); 451 break; 452 } 453 spin_unlock_irq(&ent->lock); 454 455 queue_work(cache->wq, &ent->work); 456 } 457 458 if (!mr) 459 cache->ent[c].miss++; 460 461 return mr; 462 } 463 464 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 465 { 466 struct mlx5_mr_cache *cache = &dev->cache; 467 struct mlx5_cache_ent *ent; 468 int shrink = 0; 469 int c; 470 471 c = order2idx(dev, mr->order); 472 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 473 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 474 return; 475 } 476 ent = &cache->ent[c]; 477 spin_lock_irq(&ent->lock); 478 list_add_tail(&mr->list, &ent->head); 479 ent->cur++; 480 if (ent->cur > 2 * ent->limit) 481 shrink = 1; 482 spin_unlock_irq(&ent->lock); 483 484 if (shrink) 485 queue_work(cache->wq, &ent->work); 486 } 487 488 static void clean_keys(struct mlx5_ib_dev *dev, int c) 489 { 490 struct mlx5_mr_cache *cache = &dev->cache; 491 struct mlx5_cache_ent *ent = &cache->ent[c]; 492 struct mlx5_ib_mr *mr; 493 int err; 494 495 cancel_delayed_work(&ent->dwork); 496 while (1) { 497 spin_lock_irq(&ent->lock); 498 if (list_empty(&ent->head)) { 499 spin_unlock_irq(&ent->lock); 500 return; 501 } 502 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 503 list_del(&mr->list); 504 ent->cur--; 505 ent->size--; 506 spin_unlock_irq(&ent->lock); 507 err = destroy_mkey(dev, mr); 508 if (err) 509 mlx5_ib_warn(dev, "failed destroy mkey\n"); 510 else 511 kfree(mr); 512 } 513 } 514 515 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) 516 { 517 struct mlx5_mr_cache *cache = &dev->cache; 518 struct mlx5_cache_ent *ent; 519 int i; 520 521 if (!mlx5_debugfs_root) 522 return 0; 523 524 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); 525 if (!cache->root) 526 return -ENOMEM; 527 528 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 529 ent = &cache->ent[i]; 530 sprintf(ent->name, "%d", ent->order); 531 ent->dir = debugfs_create_dir(ent->name, cache->root); 532 if (!ent->dir) 533 return -ENOMEM; 534 535 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, 536 &size_fops); 537 if (!ent->fsize) 538 return -ENOMEM; 539 540 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, 541 &limit_fops); 542 if (!ent->flimit) 543 return -ENOMEM; 544 545 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, 546 &ent->cur); 547 if (!ent->fcur) 548 return -ENOMEM; 549 550 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, 551 &ent->miss); 552 if (!ent->fmiss) 553 return -ENOMEM; 554 } 555 556 return 0; 557 } 558 559 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 560 { 561 if (!mlx5_debugfs_root) 562 return; 563 564 debugfs_remove_recursive(dev->cache.root); 565 } 566 567 static void delay_time_func(unsigned long ctx) 568 { 569 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; 570 571 dev->fill_delay = 0; 572 } 573 574 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 575 { 576 struct mlx5_mr_cache *cache = &dev->cache; 577 struct mlx5_cache_ent *ent; 578 int limit; 579 int err; 580 int i; 581 582 cache->wq = create_singlethread_workqueue("mkey_cache"); 583 if (!cache->wq) { 584 mlx5_ib_warn(dev, "failed to create work queue\n"); 585 return -ENOMEM; 586 } 587 588 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); 589 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 590 INIT_LIST_HEAD(&cache->ent[i].head); 591 spin_lock_init(&cache->ent[i].lock); 592 593 ent = &cache->ent[i]; 594 INIT_LIST_HEAD(&ent->head); 595 spin_lock_init(&ent->lock); 596 ent->order = i + 2; 597 ent->dev = dev; 598 599 if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) 600 limit = dev->mdev->profile->mr_cache[i].limit; 601 else 602 limit = 0; 603 604 INIT_WORK(&ent->work, cache_work_func); 605 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 606 ent->limit = limit; 607 queue_work(cache->wq, &ent->work); 608 } 609 610 err = mlx5_mr_cache_debugfs_init(dev); 611 if (err) 612 mlx5_ib_warn(dev, "cache debugfs failure\n"); 613 614 return 0; 615 } 616 617 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 618 { 619 int i; 620 621 dev->cache.stopped = 1; 622 flush_workqueue(dev->cache.wq); 623 624 mlx5_mr_cache_debugfs_cleanup(dev); 625 626 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 627 clean_keys(dev, i); 628 629 destroy_workqueue(dev->cache.wq); 630 del_timer_sync(&dev->delay_timer); 631 632 return 0; 633 } 634 635 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 636 { 637 struct mlx5_ib_dev *dev = to_mdev(pd->device); 638 struct mlx5_core_dev *mdev = dev->mdev; 639 struct mlx5_create_mkey_mbox_in *in; 640 struct mlx5_mkey_seg *seg; 641 struct mlx5_ib_mr *mr; 642 int err; 643 644 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 645 if (!mr) 646 return ERR_PTR(-ENOMEM); 647 648 in = kzalloc(sizeof(*in), GFP_KERNEL); 649 if (!in) { 650 err = -ENOMEM; 651 goto err_free; 652 } 653 654 seg = &in->seg; 655 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; 656 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); 657 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 658 seg->start_addr = 0; 659 660 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, 661 NULL); 662 if (err) 663 goto err_in; 664 665 kfree(in); 666 mr->ibmr.lkey = mr->mmr.key; 667 mr->ibmr.rkey = mr->mmr.key; 668 mr->umem = NULL; 669 670 return &mr->ibmr; 671 672 err_in: 673 kfree(in); 674 675 err_free: 676 kfree(mr); 677 678 return ERR_PTR(err); 679 } 680 681 static int get_octo_len(u64 addr, u64 len, int page_size) 682 { 683 u64 offset; 684 int npages; 685 686 offset = addr & (page_size - 1); 687 npages = ALIGN(len + offset, page_size) >> ilog2(page_size); 688 return (npages + 1) / 2; 689 } 690 691 static int use_umr(int order) 692 { 693 return order <= MLX5_MAX_UMR_SHIFT; 694 } 695 696 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 697 struct ib_sge *sg, u64 dma, int n, u32 key, 698 int page_shift, u64 virt_addr, u64 len, 699 int access_flags) 700 { 701 struct mlx5_ib_dev *dev = to_mdev(pd->device); 702 struct mlx5_umr_wr *umrwr = umr_wr(wr); 703 704 sg->addr = dma; 705 sg->length = ALIGN(sizeof(u64) * n, 64); 706 sg->lkey = dev->umrc.pd->local_dma_lkey; 707 708 wr->next = NULL; 709 wr->send_flags = 0; 710 wr->sg_list = sg; 711 if (n) 712 wr->num_sge = 1; 713 else 714 wr->num_sge = 0; 715 716 wr->opcode = MLX5_IB_WR_UMR; 717 718 umrwr->npages = n; 719 umrwr->page_shift = page_shift; 720 umrwr->mkey = key; 721 umrwr->target.virt_addr = virt_addr; 722 umrwr->length = len; 723 umrwr->access_flags = access_flags; 724 umrwr->pd = pd; 725 } 726 727 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, 728 struct ib_send_wr *wr, u32 key) 729 { 730 struct mlx5_umr_wr *umrwr = umr_wr(wr); 731 732 wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; 733 wr->opcode = MLX5_IB_WR_UMR; 734 umrwr->mkey = key; 735 } 736 737 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) 738 { 739 struct mlx5_ib_umr_context *context; 740 struct ib_wc wc; 741 int err; 742 743 while (1) { 744 err = ib_poll_cq(cq, 1, &wc); 745 if (err < 0) { 746 pr_warn("poll cq error %d\n", err); 747 return; 748 } 749 if (err == 0) 750 break; 751 752 context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; 753 context->status = wc.status; 754 complete(&context->done); 755 } 756 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 757 } 758 759 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, 760 u64 virt_addr, u64 len, int npages, 761 int page_shift, int order, int access_flags) 762 { 763 struct mlx5_ib_dev *dev = to_mdev(pd->device); 764 struct device *ddev = dev->ib_dev.dma_device; 765 struct umr_common *umrc = &dev->umrc; 766 struct mlx5_ib_umr_context umr_context; 767 struct mlx5_umr_wr umrwr; 768 struct ib_send_wr *bad; 769 struct mlx5_ib_mr *mr; 770 struct ib_sge sg; 771 int size; 772 __be64 *mr_pas; 773 __be64 *pas; 774 dma_addr_t dma; 775 int err = 0; 776 int i; 777 778 for (i = 0; i < 1; i++) { 779 mr = alloc_cached_mr(dev, order); 780 if (mr) 781 break; 782 783 err = add_keys(dev, order2idx(dev, order), 1); 784 if (err && err != -EAGAIN) { 785 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); 786 break; 787 } 788 } 789 790 if (!mr) 791 return ERR_PTR(-EAGAIN); 792 793 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. 794 * To avoid copying garbage after the pas array, we allocate 795 * a little more. */ 796 size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); 797 mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); 798 if (!mr_pas) { 799 err = -ENOMEM; 800 goto free_mr; 801 } 802 803 pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN); 804 mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); 805 /* Clear padding after the actual pages. */ 806 memset(pas + npages, 0, size - npages * sizeof(u64)); 807 808 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); 809 if (dma_mapping_error(ddev, dma)) { 810 err = -ENOMEM; 811 goto free_pas; 812 } 813 814 memset(&umrwr, 0, sizeof(umrwr)); 815 umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; 816 prep_umr_reg_wqe(pd, &umrwr.wr, &sg, dma, npages, mr->mmr.key, 817 page_shift, virt_addr, len, access_flags); 818 819 mlx5_ib_init_umr_context(&umr_context); 820 down(&umrc->sem); 821 err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 822 if (err) { 823 mlx5_ib_warn(dev, "post send failed, err %d\n", err); 824 goto unmap_dma; 825 } else { 826 wait_for_completion(&umr_context.done); 827 if (umr_context.status != IB_WC_SUCCESS) { 828 mlx5_ib_warn(dev, "reg umr failed\n"); 829 err = -EFAULT; 830 } 831 } 832 833 mr->mmr.iova = virt_addr; 834 mr->mmr.size = len; 835 mr->mmr.pd = to_mpd(pd)->pdn; 836 837 mr->live = 1; 838 839 unmap_dma: 840 up(&umrc->sem); 841 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 842 843 free_pas: 844 kfree(mr_pas); 845 846 free_mr: 847 if (err) { 848 free_cached_mr(dev, mr); 849 return ERR_PTR(err); 850 } 851 852 return mr; 853 } 854 855 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 856 int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, 857 int zap) 858 { 859 struct mlx5_ib_dev *dev = mr->dev; 860 struct device *ddev = dev->ib_dev.dma_device; 861 struct umr_common *umrc = &dev->umrc; 862 struct mlx5_ib_umr_context umr_context; 863 struct ib_umem *umem = mr->umem; 864 int size; 865 __be64 *pas; 866 dma_addr_t dma; 867 struct ib_send_wr *bad; 868 struct mlx5_umr_wr wr; 869 struct ib_sge sg; 870 int err = 0; 871 const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); 872 const int page_index_mask = page_index_alignment - 1; 873 size_t pages_mapped = 0; 874 size_t pages_to_map = 0; 875 size_t pages_iter = 0; 876 int use_emergency_buf = 0; 877 878 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 879 * so we need to align the offset and length accordingly */ 880 if (start_page_index & page_index_mask) { 881 npages += start_page_index & page_index_mask; 882 start_page_index &= ~page_index_mask; 883 } 884 885 pages_to_map = ALIGN(npages, page_index_alignment); 886 887 if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES) 888 return -EINVAL; 889 890 size = sizeof(u64) * pages_to_map; 891 size = min_t(int, PAGE_SIZE, size); 892 /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim 893 * code, when we are called from an invalidation. The pas buffer must 894 * be 2k-aligned for Connect-IB. */ 895 pas = (__be64 *)get_zeroed_page(GFP_ATOMIC); 896 if (!pas) { 897 mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n"); 898 pas = mlx5_ib_update_mtt_emergency_buffer; 899 size = MLX5_UMR_MTT_MIN_CHUNK_SIZE; 900 use_emergency_buf = 1; 901 mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex); 902 memset(pas, 0, size); 903 } 904 pages_iter = size / sizeof(u64); 905 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); 906 if (dma_mapping_error(ddev, dma)) { 907 mlx5_ib_err(dev, "unable to map DMA during MTT update.\n"); 908 err = -ENOMEM; 909 goto free_pas; 910 } 911 912 for (pages_mapped = 0; 913 pages_mapped < pages_to_map && !err; 914 pages_mapped += pages_iter, start_page_index += pages_iter) { 915 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); 916 917 npages = min_t(size_t, 918 pages_iter, 919 ib_umem_num_pages(umem) - start_page_index); 920 921 if (!zap) { 922 __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT, 923 start_page_index, npages, pas, 924 MLX5_IB_MTT_PRESENT); 925 /* Clear padding after the pages brought from the 926 * umem. */ 927 memset(pas + npages, 0, size - npages * sizeof(u64)); 928 } 929 930 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 931 932 memset(&wr, 0, sizeof(wr)); 933 wr.wr.wr_id = (u64)(unsigned long)&umr_context; 934 935 sg.addr = dma; 936 sg.length = ALIGN(npages * sizeof(u64), 937 MLX5_UMR_MTT_ALIGNMENT); 938 sg.lkey = dev->umrc.pd->local_dma_lkey; 939 940 wr.wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | 941 MLX5_IB_SEND_UMR_UPDATE_MTT; 942 wr.wr.sg_list = &sg; 943 wr.wr.num_sge = 1; 944 wr.wr.opcode = MLX5_IB_WR_UMR; 945 wr.npages = sg.length / sizeof(u64); 946 wr.page_shift = PAGE_SHIFT; 947 wr.mkey = mr->mmr.key; 948 wr.target.offset = start_page_index; 949 950 mlx5_ib_init_umr_context(&umr_context); 951 down(&umrc->sem); 952 err = ib_post_send(umrc->qp, &wr.wr, &bad); 953 if (err) { 954 mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); 955 } else { 956 wait_for_completion(&umr_context.done); 957 if (umr_context.status != IB_WC_SUCCESS) { 958 mlx5_ib_err(dev, "UMR completion failed, code %d\n", 959 umr_context.status); 960 err = -EFAULT; 961 } 962 } 963 up(&umrc->sem); 964 } 965 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 966 967 free_pas: 968 if (!use_emergency_buf) 969 free_page((unsigned long)pas); 970 else 971 mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex); 972 973 return err; 974 } 975 #endif 976 977 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, 978 u64 length, struct ib_umem *umem, 979 int npages, int page_shift, 980 int access_flags) 981 { 982 struct mlx5_ib_dev *dev = to_mdev(pd->device); 983 struct mlx5_create_mkey_mbox_in *in; 984 struct mlx5_ib_mr *mr; 985 int inlen; 986 int err; 987 bool pg_cap = !!(MLX5_CAP_GEN(dev->mdev, pg)); 988 989 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 990 if (!mr) 991 return ERR_PTR(-ENOMEM); 992 993 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; 994 in = mlx5_vzalloc(inlen); 995 if (!in) { 996 err = -ENOMEM; 997 goto err_1; 998 } 999 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 1000 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 1001 1002 /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags 1003 * in the page list submitted with the command. */ 1004 in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0; 1005 in->seg.flags = convert_access(access_flags) | 1006 MLX5_ACCESS_MODE_MTT; 1007 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1008 in->seg.start_addr = cpu_to_be64(virt_addr); 1009 in->seg.len = cpu_to_be64(length); 1010 in->seg.bsfs_octo_size = 0; 1011 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 1012 in->seg.log2_page_size = page_shift; 1013 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1014 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1015 1 << page_shift)); 1016 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL, 1017 NULL, NULL); 1018 if (err) { 1019 mlx5_ib_warn(dev, "create mkey failed\n"); 1020 goto err_2; 1021 } 1022 mr->umem = umem; 1023 mr->dev = dev; 1024 mr->live = 1; 1025 kvfree(in); 1026 1027 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); 1028 1029 return mr; 1030 1031 err_2: 1032 kvfree(in); 1033 1034 err_1: 1035 kfree(mr); 1036 1037 return ERR_PTR(err); 1038 } 1039 1040 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1041 u64 virt_addr, int access_flags, 1042 struct ib_udata *udata) 1043 { 1044 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1045 struct mlx5_ib_mr *mr = NULL; 1046 struct ib_umem *umem; 1047 int page_shift; 1048 int npages; 1049 int ncont; 1050 int order; 1051 int err; 1052 1053 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1054 start, virt_addr, length, access_flags); 1055 umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 1056 0); 1057 if (IS_ERR(umem)) { 1058 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); 1059 return (void *)umem; 1060 } 1061 1062 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); 1063 if (!npages) { 1064 mlx5_ib_warn(dev, "avoid zero region\n"); 1065 err = -EINVAL; 1066 goto error; 1067 } 1068 1069 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 1070 npages, ncont, order, page_shift); 1071 1072 if (use_umr(order)) { 1073 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, 1074 order, access_flags); 1075 if (PTR_ERR(mr) == -EAGAIN) { 1076 mlx5_ib_dbg(dev, "cache empty for order %d", order); 1077 mr = NULL; 1078 } 1079 } else if (access_flags & IB_ACCESS_ON_DEMAND) { 1080 err = -EINVAL; 1081 pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); 1082 goto error; 1083 } 1084 1085 if (!mr) 1086 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, 1087 access_flags); 1088 1089 if (IS_ERR(mr)) { 1090 err = PTR_ERR(mr); 1091 goto error; 1092 } 1093 1094 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); 1095 1096 mr->umem = umem; 1097 mr->npages = npages; 1098 atomic_add(npages, &dev->mdev->priv.reg_pages); 1099 mr->ibmr.lkey = mr->mmr.key; 1100 mr->ibmr.rkey = mr->mmr.key; 1101 1102 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1103 if (umem->odp_data) { 1104 /* 1105 * This barrier prevents the compiler from moving the 1106 * setting of umem->odp_data->private to point to our 1107 * MR, before reg_umr finished, to ensure that the MR 1108 * initialization have finished before starting to 1109 * handle invalidations. 1110 */ 1111 smp_wmb(); 1112 mr->umem->odp_data->private = mr; 1113 /* 1114 * Make sure we will see the new 1115 * umem->odp_data->private value in the invalidation 1116 * routines, before we can get page faults on the 1117 * MR. Page faults can happen once we put the MR in 1118 * the tree, below this line. Without the barrier, 1119 * there can be a fault handling and an invalidation 1120 * before umem->odp_data->private == mr is visible to 1121 * the invalidation handler. 1122 */ 1123 smp_wmb(); 1124 } 1125 #endif 1126 1127 return &mr->ibmr; 1128 1129 error: 1130 ib_umem_release(umem); 1131 return ERR_PTR(err); 1132 } 1133 1134 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1135 { 1136 struct umr_common *umrc = &dev->umrc; 1137 struct mlx5_ib_umr_context umr_context; 1138 struct mlx5_umr_wr umrwr; 1139 struct ib_send_wr *bad; 1140 int err; 1141 1142 memset(&umrwr.wr, 0, sizeof(umrwr)); 1143 umrwr.wr.wr_id = (u64)(unsigned long)&umr_context; 1144 prep_umr_unreg_wqe(dev, &umrwr.wr, mr->mmr.key); 1145 1146 mlx5_ib_init_umr_context(&umr_context); 1147 down(&umrc->sem); 1148 err = ib_post_send(umrc->qp, &umrwr.wr, &bad); 1149 if (err) { 1150 up(&umrc->sem); 1151 mlx5_ib_dbg(dev, "err %d\n", err); 1152 goto error; 1153 } else { 1154 wait_for_completion(&umr_context.done); 1155 up(&umrc->sem); 1156 } 1157 if (umr_context.status != IB_WC_SUCCESS) { 1158 mlx5_ib_warn(dev, "unreg umr failed\n"); 1159 err = -EFAULT; 1160 goto error; 1161 } 1162 return 0; 1163 1164 error: 1165 return err; 1166 } 1167 1168 static int 1169 mlx5_alloc_priv_descs(struct ib_device *device, 1170 struct mlx5_ib_mr *mr, 1171 int ndescs, 1172 int desc_size) 1173 { 1174 int size = ndescs * desc_size; 1175 int add_size; 1176 int ret; 1177 1178 add_size = max_t(int, MLX5_UMR_ALIGN - ARCH_KMALLOC_MINALIGN, 0); 1179 1180 mr->descs_alloc = kzalloc(size + add_size, GFP_KERNEL); 1181 if (!mr->descs_alloc) 1182 return -ENOMEM; 1183 1184 mr->descs = PTR_ALIGN(mr->descs_alloc, MLX5_UMR_ALIGN); 1185 1186 mr->desc_map = dma_map_single(device->dma_device, mr->descs, 1187 size, DMA_TO_DEVICE); 1188 if (dma_mapping_error(device->dma_device, mr->desc_map)) { 1189 ret = -ENOMEM; 1190 goto err; 1191 } 1192 1193 return 0; 1194 err: 1195 kfree(mr->descs_alloc); 1196 1197 return ret; 1198 } 1199 1200 static void 1201 mlx5_free_priv_descs(struct mlx5_ib_mr *mr) 1202 { 1203 if (mr->descs) { 1204 struct ib_device *device = mr->ibmr.device; 1205 int size = mr->max_descs * mr->desc_size; 1206 1207 dma_unmap_single(device->dma_device, mr->desc_map, 1208 size, DMA_TO_DEVICE); 1209 kfree(mr->descs_alloc); 1210 mr->descs = NULL; 1211 } 1212 } 1213 1214 static int clean_mr(struct mlx5_ib_mr *mr) 1215 { 1216 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1217 int umred = mr->umred; 1218 int err; 1219 1220 if (mr->sig) { 1221 if (mlx5_core_destroy_psv(dev->mdev, 1222 mr->sig->psv_memory.psv_idx)) 1223 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1224 mr->sig->psv_memory.psv_idx); 1225 if (mlx5_core_destroy_psv(dev->mdev, 1226 mr->sig->psv_wire.psv_idx)) 1227 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1228 mr->sig->psv_wire.psv_idx); 1229 kfree(mr->sig); 1230 mr->sig = NULL; 1231 } 1232 1233 mlx5_free_priv_descs(mr); 1234 1235 if (!umred) { 1236 err = destroy_mkey(dev, mr); 1237 if (err) { 1238 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 1239 mr->mmr.key, err); 1240 return err; 1241 } 1242 } else { 1243 err = unreg_umr(dev, mr); 1244 if (err) { 1245 mlx5_ib_warn(dev, "failed unregister\n"); 1246 return err; 1247 } 1248 free_cached_mr(dev, mr); 1249 } 1250 1251 if (!umred) 1252 kfree(mr); 1253 1254 return 0; 1255 } 1256 1257 int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 1258 { 1259 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 1260 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1261 int npages = mr->npages; 1262 struct ib_umem *umem = mr->umem; 1263 1264 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1265 if (umem && umem->odp_data) { 1266 /* Prevent new page faults from succeeding */ 1267 mr->live = 0; 1268 /* Wait for all running page-fault handlers to finish. */ 1269 synchronize_srcu(&dev->mr_srcu); 1270 /* Destroy all page mappings */ 1271 mlx5_ib_invalidate_range(umem, ib_umem_start(umem), 1272 ib_umem_end(umem)); 1273 /* 1274 * We kill the umem before the MR for ODP, 1275 * so that there will not be any invalidations in 1276 * flight, looking at the *mr struct. 1277 */ 1278 ib_umem_release(umem); 1279 atomic_sub(npages, &dev->mdev->priv.reg_pages); 1280 1281 /* Avoid double-freeing the umem. */ 1282 umem = NULL; 1283 } 1284 #endif 1285 1286 clean_mr(mr); 1287 1288 if (umem) { 1289 ib_umem_release(umem); 1290 atomic_sub(npages, &dev->mdev->priv.reg_pages); 1291 } 1292 1293 return 0; 1294 } 1295 1296 struct ib_mr *mlx5_ib_alloc_mr(struct ib_pd *pd, 1297 enum ib_mr_type mr_type, 1298 u32 max_num_sg) 1299 { 1300 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1301 struct mlx5_create_mkey_mbox_in *in; 1302 struct mlx5_ib_mr *mr; 1303 int access_mode, err; 1304 int ndescs = roundup(max_num_sg, 4); 1305 1306 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1307 if (!mr) 1308 return ERR_PTR(-ENOMEM); 1309 1310 in = kzalloc(sizeof(*in), GFP_KERNEL); 1311 if (!in) { 1312 err = -ENOMEM; 1313 goto err_free; 1314 } 1315 1316 in->seg.status = MLX5_MKEY_STATUS_FREE; 1317 in->seg.xlt_oct_size = cpu_to_be32(ndescs); 1318 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1319 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1320 1321 if (mr_type == IB_MR_TYPE_MEM_REG) { 1322 access_mode = MLX5_ACCESS_MODE_MTT; 1323 in->seg.log2_page_size = PAGE_SHIFT; 1324 1325 err = mlx5_alloc_priv_descs(pd->device, mr, 1326 ndescs, sizeof(u64)); 1327 if (err) 1328 goto err_free_in; 1329 1330 mr->desc_size = sizeof(u64); 1331 mr->max_descs = ndescs; 1332 } else if (mr_type == IB_MR_TYPE_SIGNATURE) { 1333 u32 psv_index[2]; 1334 1335 in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | 1336 MLX5_MKEY_BSF_EN); 1337 in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); 1338 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 1339 if (!mr->sig) { 1340 err = -ENOMEM; 1341 goto err_free_in; 1342 } 1343 1344 /* create mem & wire PSVs */ 1345 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 1346 2, psv_index); 1347 if (err) 1348 goto err_free_sig; 1349 1350 access_mode = MLX5_ACCESS_MODE_KLM; 1351 mr->sig->psv_memory.psv_idx = psv_index[0]; 1352 mr->sig->psv_wire.psv_idx = psv_index[1]; 1353 1354 mr->sig->sig_status_checked = true; 1355 mr->sig->sig_err_exists = false; 1356 /* Next UMR, Arm SIGERR */ 1357 ++mr->sig->sigerr_count; 1358 } else { 1359 mlx5_ib_warn(dev, "Invalid mr type %d\n", mr_type); 1360 err = -EINVAL; 1361 goto err_free_in; 1362 } 1363 1364 in->seg.flags = MLX5_PERM_UMR_EN | access_mode; 1365 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), 1366 NULL, NULL, NULL); 1367 if (err) 1368 goto err_destroy_psv; 1369 1370 mr->ibmr.lkey = mr->mmr.key; 1371 mr->ibmr.rkey = mr->mmr.key; 1372 mr->umem = NULL; 1373 kfree(in); 1374 1375 return &mr->ibmr; 1376 1377 err_destroy_psv: 1378 if (mr->sig) { 1379 if (mlx5_core_destroy_psv(dev->mdev, 1380 mr->sig->psv_memory.psv_idx)) 1381 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1382 mr->sig->psv_memory.psv_idx); 1383 if (mlx5_core_destroy_psv(dev->mdev, 1384 mr->sig->psv_wire.psv_idx)) 1385 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1386 mr->sig->psv_wire.psv_idx); 1387 } 1388 mlx5_free_priv_descs(mr); 1389 err_free_sig: 1390 kfree(mr->sig); 1391 err_free_in: 1392 kfree(in); 1393 err_free: 1394 kfree(mr); 1395 return ERR_PTR(err); 1396 } 1397 1398 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1399 struct ib_mr_status *mr_status) 1400 { 1401 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1402 int ret = 0; 1403 1404 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 1405 pr_err("Invalid status check mask\n"); 1406 ret = -EINVAL; 1407 goto done; 1408 } 1409 1410 mr_status->fail_status = 0; 1411 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 1412 if (!mmr->sig) { 1413 ret = -EINVAL; 1414 pr_err("signature status check requested on a non-signature enabled MR\n"); 1415 goto done; 1416 } 1417 1418 mmr->sig->sig_status_checked = true; 1419 if (!mmr->sig->sig_err_exists) 1420 goto done; 1421 1422 if (ibmr->lkey == mmr->sig->err_item.key) 1423 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 1424 sizeof(mr_status->sig_err)); 1425 else { 1426 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 1427 mr_status->sig_err.sig_err_offset = 0; 1428 mr_status->sig_err.key = mmr->sig->err_item.key; 1429 } 1430 1431 mmr->sig->sig_err_exists = false; 1432 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 1433 } 1434 1435 done: 1436 return ret; 1437 } 1438 1439 static int mlx5_set_page(struct ib_mr *ibmr, u64 addr) 1440 { 1441 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1442 __be64 *descs; 1443 1444 if (unlikely(mr->ndescs == mr->max_descs)) 1445 return -ENOMEM; 1446 1447 descs = mr->descs; 1448 descs[mr->ndescs++] = cpu_to_be64(addr | MLX5_EN_RD | MLX5_EN_WR); 1449 1450 return 0; 1451 } 1452 1453 int mlx5_ib_map_mr_sg(struct ib_mr *ibmr, 1454 struct scatterlist *sg, 1455 int sg_nents) 1456 { 1457 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1458 int n; 1459 1460 mr->ndescs = 0; 1461 1462 ib_dma_sync_single_for_cpu(ibmr->device, mr->desc_map, 1463 mr->desc_size * mr->max_descs, 1464 DMA_TO_DEVICE); 1465 1466 n = ib_sg_to_pages(ibmr, sg, sg_nents, mlx5_set_page); 1467 1468 ib_dma_sync_single_for_device(ibmr->device, mr->desc_map, 1469 mr->desc_size * mr->max_descs, 1470 DMA_TO_DEVICE); 1471 1472 return n; 1473 } 1474