1 /* 2 * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 34 #include <linux/kref.h> 35 #include <linux/random.h> 36 #include <linux/debugfs.h> 37 #include <linux/export.h> 38 #include <linux/delay.h> 39 #include <rdma/ib_umem.h> 40 #include <rdma/ib_umem_odp.h> 41 #include <rdma/ib_verbs.h> 42 #include "mlx5_ib.h" 43 44 enum { 45 MAX_PENDING_REG_MR = 8, 46 }; 47 48 #define MLX5_UMR_ALIGN 2048 49 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 50 static __be64 mlx5_ib_update_mtt_emergency_buffer[ 51 MLX5_UMR_MTT_MIN_CHUNK_SIZE/sizeof(__be64)] 52 __aligned(MLX5_UMR_ALIGN); 53 static DEFINE_MUTEX(mlx5_ib_update_mtt_emergency_buffer_mutex); 54 #endif 55 56 static int clean_mr(struct mlx5_ib_mr *mr); 57 58 static int destroy_mkey(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 59 { 60 int err = mlx5_core_destroy_mkey(dev->mdev, &mr->mmr); 61 62 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 63 /* Wait until all page fault handlers using the mr complete. */ 64 synchronize_srcu(&dev->mr_srcu); 65 #endif 66 67 return err; 68 } 69 70 static int order2idx(struct mlx5_ib_dev *dev, int order) 71 { 72 struct mlx5_mr_cache *cache = &dev->cache; 73 74 if (order < cache->ent[0].order) 75 return 0; 76 else 77 return order - cache->ent[0].order; 78 } 79 80 static void reg_mr_callback(int status, void *context) 81 { 82 struct mlx5_ib_mr *mr = context; 83 struct mlx5_ib_dev *dev = mr->dev; 84 struct mlx5_mr_cache *cache = &dev->cache; 85 int c = order2idx(dev, mr->order); 86 struct mlx5_cache_ent *ent = &cache->ent[c]; 87 u8 key; 88 unsigned long flags; 89 struct mlx5_mr_table *table = &dev->mdev->priv.mr_table; 90 int err; 91 92 spin_lock_irqsave(&ent->lock, flags); 93 ent->pending--; 94 spin_unlock_irqrestore(&ent->lock, flags); 95 if (status) { 96 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 97 kfree(mr); 98 dev->fill_delay = 1; 99 mod_timer(&dev->delay_timer, jiffies + HZ); 100 return; 101 } 102 103 if (mr->out.hdr.status) { 104 mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", 105 mr->out.hdr.status, 106 be32_to_cpu(mr->out.hdr.syndrome)); 107 kfree(mr); 108 dev->fill_delay = 1; 109 mod_timer(&dev->delay_timer, jiffies + HZ); 110 return; 111 } 112 113 spin_lock_irqsave(&dev->mdev->priv.mkey_lock, flags); 114 key = dev->mdev->priv.mkey_key++; 115 spin_unlock_irqrestore(&dev->mdev->priv.mkey_lock, flags); 116 mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; 117 118 cache->last_add = jiffies; 119 120 spin_lock_irqsave(&ent->lock, flags); 121 list_add_tail(&mr->list, &ent->head); 122 ent->cur++; 123 ent->size++; 124 spin_unlock_irqrestore(&ent->lock, flags); 125 126 write_lock_irqsave(&table->lock, flags); 127 err = radix_tree_insert(&table->tree, mlx5_base_mkey(mr->mmr.key), 128 &mr->mmr); 129 if (err) 130 pr_err("Error inserting to mr tree. 0x%x\n", -err); 131 write_unlock_irqrestore(&table->lock, flags); 132 } 133 134 static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 135 { 136 struct mlx5_mr_cache *cache = &dev->cache; 137 struct mlx5_cache_ent *ent = &cache->ent[c]; 138 struct mlx5_create_mkey_mbox_in *in; 139 struct mlx5_ib_mr *mr; 140 int npages = 1 << ent->order; 141 int err = 0; 142 int i; 143 144 in = kzalloc(sizeof(*in), GFP_KERNEL); 145 if (!in) 146 return -ENOMEM; 147 148 for (i = 0; i < num; i++) { 149 if (ent->pending >= MAX_PENDING_REG_MR) { 150 err = -EAGAIN; 151 break; 152 } 153 154 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 155 if (!mr) { 156 err = -ENOMEM; 157 break; 158 } 159 mr->order = ent->order; 160 mr->umred = 1; 161 mr->dev = dev; 162 in->seg.status = MLX5_MKEY_STATUS_FREE; 163 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); 164 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 165 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; 166 in->seg.log2_page_size = 12; 167 168 spin_lock_irq(&ent->lock); 169 ent->pending++; 170 spin_unlock_irq(&ent->lock); 171 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, 172 sizeof(*in), reg_mr_callback, 173 mr, &mr->out); 174 if (err) { 175 spin_lock_irq(&ent->lock); 176 ent->pending--; 177 spin_unlock_irq(&ent->lock); 178 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 179 kfree(mr); 180 break; 181 } 182 } 183 184 kfree(in); 185 return err; 186 } 187 188 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 189 { 190 struct mlx5_mr_cache *cache = &dev->cache; 191 struct mlx5_cache_ent *ent = &cache->ent[c]; 192 struct mlx5_ib_mr *mr; 193 int err; 194 int i; 195 196 for (i = 0; i < num; i++) { 197 spin_lock_irq(&ent->lock); 198 if (list_empty(&ent->head)) { 199 spin_unlock_irq(&ent->lock); 200 return; 201 } 202 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 203 list_del(&mr->list); 204 ent->cur--; 205 ent->size--; 206 spin_unlock_irq(&ent->lock); 207 err = destroy_mkey(dev, mr); 208 if (err) 209 mlx5_ib_warn(dev, "failed destroy mkey\n"); 210 else 211 kfree(mr); 212 } 213 } 214 215 static ssize_t size_write(struct file *filp, const char __user *buf, 216 size_t count, loff_t *pos) 217 { 218 struct mlx5_cache_ent *ent = filp->private_data; 219 struct mlx5_ib_dev *dev = ent->dev; 220 char lbuf[20]; 221 u32 var; 222 int err; 223 int c; 224 225 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 226 return -EFAULT; 227 228 c = order2idx(dev, ent->order); 229 lbuf[sizeof(lbuf) - 1] = 0; 230 231 if (sscanf(lbuf, "%u", &var) != 1) 232 return -EINVAL; 233 234 if (var < ent->limit) 235 return -EINVAL; 236 237 if (var > ent->size) { 238 do { 239 err = add_keys(dev, c, var - ent->size); 240 if (err && err != -EAGAIN) 241 return err; 242 243 usleep_range(3000, 5000); 244 } while (err); 245 } else if (var < ent->size) { 246 remove_keys(dev, c, ent->size - var); 247 } 248 249 return count; 250 } 251 252 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 253 loff_t *pos) 254 { 255 struct mlx5_cache_ent *ent = filp->private_data; 256 char lbuf[20]; 257 int err; 258 259 if (*pos) 260 return 0; 261 262 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 263 if (err < 0) 264 return err; 265 266 if (copy_to_user(buf, lbuf, err)) 267 return -EFAULT; 268 269 *pos += err; 270 271 return err; 272 } 273 274 static const struct file_operations size_fops = { 275 .owner = THIS_MODULE, 276 .open = simple_open, 277 .write = size_write, 278 .read = size_read, 279 }; 280 281 static ssize_t limit_write(struct file *filp, const char __user *buf, 282 size_t count, loff_t *pos) 283 { 284 struct mlx5_cache_ent *ent = filp->private_data; 285 struct mlx5_ib_dev *dev = ent->dev; 286 char lbuf[20]; 287 u32 var; 288 int err; 289 int c; 290 291 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 292 return -EFAULT; 293 294 c = order2idx(dev, ent->order); 295 lbuf[sizeof(lbuf) - 1] = 0; 296 297 if (sscanf(lbuf, "%u", &var) != 1) 298 return -EINVAL; 299 300 if (var > ent->size) 301 return -EINVAL; 302 303 ent->limit = var; 304 305 if (ent->cur < ent->limit) { 306 err = add_keys(dev, c, 2 * ent->limit - ent->cur); 307 if (err) 308 return err; 309 } 310 311 return count; 312 } 313 314 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 315 loff_t *pos) 316 { 317 struct mlx5_cache_ent *ent = filp->private_data; 318 char lbuf[20]; 319 int err; 320 321 if (*pos) 322 return 0; 323 324 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 325 if (err < 0) 326 return err; 327 328 if (copy_to_user(buf, lbuf, err)) 329 return -EFAULT; 330 331 *pos += err; 332 333 return err; 334 } 335 336 static const struct file_operations limit_fops = { 337 .owner = THIS_MODULE, 338 .open = simple_open, 339 .write = limit_write, 340 .read = limit_read, 341 }; 342 343 static int someone_adding(struct mlx5_mr_cache *cache) 344 { 345 int i; 346 347 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 348 if (cache->ent[i].cur < cache->ent[i].limit) 349 return 1; 350 } 351 352 return 0; 353 } 354 355 static void __cache_work_func(struct mlx5_cache_ent *ent) 356 { 357 struct mlx5_ib_dev *dev = ent->dev; 358 struct mlx5_mr_cache *cache = &dev->cache; 359 int i = order2idx(dev, ent->order); 360 int err; 361 362 if (cache->stopped) 363 return; 364 365 ent = &dev->cache.ent[i]; 366 if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 367 err = add_keys(dev, i, 1); 368 if (ent->cur < 2 * ent->limit) { 369 if (err == -EAGAIN) { 370 mlx5_ib_dbg(dev, "returned eagain, order %d\n", 371 i + 2); 372 queue_delayed_work(cache->wq, &ent->dwork, 373 msecs_to_jiffies(3)); 374 } else if (err) { 375 mlx5_ib_warn(dev, "command failed order %d, err %d\n", 376 i + 2, err); 377 queue_delayed_work(cache->wq, &ent->dwork, 378 msecs_to_jiffies(1000)); 379 } else { 380 queue_work(cache->wq, &ent->work); 381 } 382 } 383 } else if (ent->cur > 2 * ent->limit) { 384 if (!someone_adding(cache) && 385 time_after(jiffies, cache->last_add + 300 * HZ)) { 386 remove_keys(dev, i, 1); 387 if (ent->cur > ent->limit) 388 queue_work(cache->wq, &ent->work); 389 } else { 390 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 391 } 392 } 393 } 394 395 static void delayed_cache_work_func(struct work_struct *work) 396 { 397 struct mlx5_cache_ent *ent; 398 399 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 400 __cache_work_func(ent); 401 } 402 403 static void cache_work_func(struct work_struct *work) 404 { 405 struct mlx5_cache_ent *ent; 406 407 ent = container_of(work, struct mlx5_cache_ent, work); 408 __cache_work_func(ent); 409 } 410 411 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 412 { 413 struct mlx5_mr_cache *cache = &dev->cache; 414 struct mlx5_ib_mr *mr = NULL; 415 struct mlx5_cache_ent *ent; 416 int c; 417 int i; 418 419 c = order2idx(dev, order); 420 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 421 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 422 return NULL; 423 } 424 425 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { 426 ent = &cache->ent[i]; 427 428 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 429 430 spin_lock_irq(&ent->lock); 431 if (!list_empty(&ent->head)) { 432 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 433 list); 434 list_del(&mr->list); 435 ent->cur--; 436 spin_unlock_irq(&ent->lock); 437 if (ent->cur < ent->limit) 438 queue_work(cache->wq, &ent->work); 439 break; 440 } 441 spin_unlock_irq(&ent->lock); 442 443 queue_work(cache->wq, &ent->work); 444 445 if (mr) 446 break; 447 } 448 449 if (!mr) 450 cache->ent[c].miss++; 451 452 return mr; 453 } 454 455 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 456 { 457 struct mlx5_mr_cache *cache = &dev->cache; 458 struct mlx5_cache_ent *ent; 459 int shrink = 0; 460 int c; 461 462 c = order2idx(dev, mr->order); 463 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 464 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 465 return; 466 } 467 ent = &cache->ent[c]; 468 spin_lock_irq(&ent->lock); 469 list_add_tail(&mr->list, &ent->head); 470 ent->cur++; 471 if (ent->cur > 2 * ent->limit) 472 shrink = 1; 473 spin_unlock_irq(&ent->lock); 474 475 if (shrink) 476 queue_work(cache->wq, &ent->work); 477 } 478 479 static void clean_keys(struct mlx5_ib_dev *dev, int c) 480 { 481 struct mlx5_mr_cache *cache = &dev->cache; 482 struct mlx5_cache_ent *ent = &cache->ent[c]; 483 struct mlx5_ib_mr *mr; 484 int err; 485 486 cancel_delayed_work(&ent->dwork); 487 while (1) { 488 spin_lock_irq(&ent->lock); 489 if (list_empty(&ent->head)) { 490 spin_unlock_irq(&ent->lock); 491 return; 492 } 493 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 494 list_del(&mr->list); 495 ent->cur--; 496 ent->size--; 497 spin_unlock_irq(&ent->lock); 498 err = destroy_mkey(dev, mr); 499 if (err) 500 mlx5_ib_warn(dev, "failed destroy mkey\n"); 501 else 502 kfree(mr); 503 } 504 } 505 506 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) 507 { 508 struct mlx5_mr_cache *cache = &dev->cache; 509 struct mlx5_cache_ent *ent; 510 int i; 511 512 if (!mlx5_debugfs_root) 513 return 0; 514 515 cache->root = debugfs_create_dir("mr_cache", dev->mdev->priv.dbg_root); 516 if (!cache->root) 517 return -ENOMEM; 518 519 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 520 ent = &cache->ent[i]; 521 sprintf(ent->name, "%d", ent->order); 522 ent->dir = debugfs_create_dir(ent->name, cache->root); 523 if (!ent->dir) 524 return -ENOMEM; 525 526 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, 527 &size_fops); 528 if (!ent->fsize) 529 return -ENOMEM; 530 531 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, 532 &limit_fops); 533 if (!ent->flimit) 534 return -ENOMEM; 535 536 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, 537 &ent->cur); 538 if (!ent->fcur) 539 return -ENOMEM; 540 541 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, 542 &ent->miss); 543 if (!ent->fmiss) 544 return -ENOMEM; 545 } 546 547 return 0; 548 } 549 550 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 551 { 552 if (!mlx5_debugfs_root) 553 return; 554 555 debugfs_remove_recursive(dev->cache.root); 556 } 557 558 static void delay_time_func(unsigned long ctx) 559 { 560 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; 561 562 dev->fill_delay = 0; 563 } 564 565 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 566 { 567 struct mlx5_mr_cache *cache = &dev->cache; 568 struct mlx5_cache_ent *ent; 569 int limit; 570 int err; 571 int i; 572 573 cache->wq = create_singlethread_workqueue("mkey_cache"); 574 if (!cache->wq) { 575 mlx5_ib_warn(dev, "failed to create work queue\n"); 576 return -ENOMEM; 577 } 578 579 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); 580 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 581 INIT_LIST_HEAD(&cache->ent[i].head); 582 spin_lock_init(&cache->ent[i].lock); 583 584 ent = &cache->ent[i]; 585 INIT_LIST_HEAD(&ent->head); 586 spin_lock_init(&ent->lock); 587 ent->order = i + 2; 588 ent->dev = dev; 589 590 if (dev->mdev->profile->mask & MLX5_PROF_MASK_MR_CACHE) 591 limit = dev->mdev->profile->mr_cache[i].limit; 592 else 593 limit = 0; 594 595 INIT_WORK(&ent->work, cache_work_func); 596 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 597 ent->limit = limit; 598 queue_work(cache->wq, &ent->work); 599 } 600 601 err = mlx5_mr_cache_debugfs_init(dev); 602 if (err) 603 mlx5_ib_warn(dev, "cache debugfs failure\n"); 604 605 return 0; 606 } 607 608 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 609 { 610 int i; 611 612 dev->cache.stopped = 1; 613 flush_workqueue(dev->cache.wq); 614 615 mlx5_mr_cache_debugfs_cleanup(dev); 616 617 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 618 clean_keys(dev, i); 619 620 destroy_workqueue(dev->cache.wq); 621 del_timer_sync(&dev->delay_timer); 622 623 return 0; 624 } 625 626 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 627 { 628 struct mlx5_ib_dev *dev = to_mdev(pd->device); 629 struct mlx5_core_dev *mdev = dev->mdev; 630 struct mlx5_create_mkey_mbox_in *in; 631 struct mlx5_mkey_seg *seg; 632 struct mlx5_ib_mr *mr; 633 int err; 634 635 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 636 if (!mr) 637 return ERR_PTR(-ENOMEM); 638 639 in = kzalloc(sizeof(*in), GFP_KERNEL); 640 if (!in) { 641 err = -ENOMEM; 642 goto err_free; 643 } 644 645 seg = &in->seg; 646 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; 647 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); 648 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 649 seg->start_addr = 0; 650 651 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, 652 NULL); 653 if (err) 654 goto err_in; 655 656 kfree(in); 657 mr->ibmr.lkey = mr->mmr.key; 658 mr->ibmr.rkey = mr->mmr.key; 659 mr->umem = NULL; 660 661 return &mr->ibmr; 662 663 err_in: 664 kfree(in); 665 666 err_free: 667 kfree(mr); 668 669 return ERR_PTR(err); 670 } 671 672 static int get_octo_len(u64 addr, u64 len, int page_size) 673 { 674 u64 offset; 675 int npages; 676 677 offset = addr & (page_size - 1); 678 npages = ALIGN(len + offset, page_size) >> ilog2(page_size); 679 return (npages + 1) / 2; 680 } 681 682 static int use_umr(int order) 683 { 684 return order <= MLX5_MAX_UMR_SHIFT; 685 } 686 687 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 688 struct ib_sge *sg, u64 dma, int n, u32 key, 689 int page_shift, u64 virt_addr, u64 len, 690 int access_flags) 691 { 692 struct mlx5_ib_dev *dev = to_mdev(pd->device); 693 struct ib_mr *mr = dev->umrc.mr; 694 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; 695 696 sg->addr = dma; 697 sg->length = ALIGN(sizeof(u64) * n, 64); 698 sg->lkey = mr->lkey; 699 700 wr->next = NULL; 701 wr->send_flags = 0; 702 wr->sg_list = sg; 703 if (n) 704 wr->num_sge = 1; 705 else 706 wr->num_sge = 0; 707 708 wr->opcode = MLX5_IB_WR_UMR; 709 710 umrwr->npages = n; 711 umrwr->page_shift = page_shift; 712 umrwr->mkey = key; 713 umrwr->target.virt_addr = virt_addr; 714 umrwr->length = len; 715 umrwr->access_flags = access_flags; 716 umrwr->pd = pd; 717 } 718 719 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, 720 struct ib_send_wr *wr, u32 key) 721 { 722 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr->wr.fast_reg; 723 724 wr->send_flags = MLX5_IB_SEND_UMR_UNREG | MLX5_IB_SEND_UMR_FAIL_IF_FREE; 725 wr->opcode = MLX5_IB_WR_UMR; 726 umrwr->mkey = key; 727 } 728 729 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) 730 { 731 struct mlx5_ib_umr_context *context; 732 struct ib_wc wc; 733 int err; 734 735 while (1) { 736 err = ib_poll_cq(cq, 1, &wc); 737 if (err < 0) { 738 pr_warn("poll cq error %d\n", err); 739 return; 740 } 741 if (err == 0) 742 break; 743 744 context = (struct mlx5_ib_umr_context *) (unsigned long) wc.wr_id; 745 context->status = wc.status; 746 complete(&context->done); 747 } 748 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 749 } 750 751 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, 752 u64 virt_addr, u64 len, int npages, 753 int page_shift, int order, int access_flags) 754 { 755 struct mlx5_ib_dev *dev = to_mdev(pd->device); 756 struct device *ddev = dev->ib_dev.dma_device; 757 struct umr_common *umrc = &dev->umrc; 758 struct mlx5_ib_umr_context umr_context; 759 struct ib_send_wr wr, *bad; 760 struct mlx5_ib_mr *mr; 761 struct ib_sge sg; 762 int size; 763 __be64 *mr_pas; 764 __be64 *pas; 765 dma_addr_t dma; 766 int err = 0; 767 int i; 768 769 for (i = 0; i < 1; i++) { 770 mr = alloc_cached_mr(dev, order); 771 if (mr) 772 break; 773 774 err = add_keys(dev, order2idx(dev, order), 1); 775 if (err && err != -EAGAIN) { 776 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); 777 break; 778 } 779 } 780 781 if (!mr) 782 return ERR_PTR(-EAGAIN); 783 784 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes. 785 * To avoid copying garbage after the pas array, we allocate 786 * a little more. */ 787 size = ALIGN(sizeof(u64) * npages, MLX5_UMR_MTT_ALIGNMENT); 788 mr_pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); 789 if (!mr_pas) { 790 err = -ENOMEM; 791 goto free_mr; 792 } 793 794 pas = PTR_ALIGN(mr_pas, MLX5_UMR_ALIGN); 795 mlx5_ib_populate_pas(dev, umem, page_shift, pas, MLX5_IB_MTT_PRESENT); 796 /* Clear padding after the actual pages. */ 797 memset(pas + npages, 0, size - npages * sizeof(u64)); 798 799 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); 800 if (dma_mapping_error(ddev, dma)) { 801 err = -ENOMEM; 802 goto free_pas; 803 } 804 805 memset(&wr, 0, sizeof(wr)); 806 wr.wr_id = (u64)(unsigned long)&umr_context; 807 prep_umr_reg_wqe(pd, &wr, &sg, dma, npages, mr->mmr.key, page_shift, 808 virt_addr, len, access_flags); 809 810 mlx5_ib_init_umr_context(&umr_context); 811 down(&umrc->sem); 812 err = ib_post_send(umrc->qp, &wr, &bad); 813 if (err) { 814 mlx5_ib_warn(dev, "post send failed, err %d\n", err); 815 goto unmap_dma; 816 } else { 817 wait_for_completion(&umr_context.done); 818 if (umr_context.status != IB_WC_SUCCESS) { 819 mlx5_ib_warn(dev, "reg umr failed\n"); 820 err = -EFAULT; 821 } 822 } 823 824 mr->mmr.iova = virt_addr; 825 mr->mmr.size = len; 826 mr->mmr.pd = to_mpd(pd)->pdn; 827 828 mr->live = 1; 829 830 unmap_dma: 831 up(&umrc->sem); 832 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 833 834 free_pas: 835 kfree(mr_pas); 836 837 free_mr: 838 if (err) { 839 free_cached_mr(dev, mr); 840 return ERR_PTR(err); 841 } 842 843 return mr; 844 } 845 846 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 847 int mlx5_ib_update_mtt(struct mlx5_ib_mr *mr, u64 start_page_index, int npages, 848 int zap) 849 { 850 struct mlx5_ib_dev *dev = mr->dev; 851 struct device *ddev = dev->ib_dev.dma_device; 852 struct umr_common *umrc = &dev->umrc; 853 struct mlx5_ib_umr_context umr_context; 854 struct ib_umem *umem = mr->umem; 855 int size; 856 __be64 *pas; 857 dma_addr_t dma; 858 struct ib_send_wr wr, *bad; 859 struct mlx5_umr_wr *umrwr = (struct mlx5_umr_wr *)&wr.wr.fast_reg; 860 struct ib_sge sg; 861 int err = 0; 862 const int page_index_alignment = MLX5_UMR_MTT_ALIGNMENT / sizeof(u64); 863 const int page_index_mask = page_index_alignment - 1; 864 size_t pages_mapped = 0; 865 size_t pages_to_map = 0; 866 size_t pages_iter = 0; 867 int use_emergency_buf = 0; 868 869 /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, 870 * so we need to align the offset and length accordingly */ 871 if (start_page_index & page_index_mask) { 872 npages += start_page_index & page_index_mask; 873 start_page_index &= ~page_index_mask; 874 } 875 876 pages_to_map = ALIGN(npages, page_index_alignment); 877 878 if (start_page_index + pages_to_map > MLX5_MAX_UMR_PAGES) 879 return -EINVAL; 880 881 size = sizeof(u64) * pages_to_map; 882 size = min_t(int, PAGE_SIZE, size); 883 /* We allocate with GFP_ATOMIC to avoid recursion into page-reclaim 884 * code, when we are called from an invalidation. The pas buffer must 885 * be 2k-aligned for Connect-IB. */ 886 pas = (__be64 *)get_zeroed_page(GFP_ATOMIC); 887 if (!pas) { 888 mlx5_ib_warn(dev, "unable to allocate memory during MTT update, falling back to slower chunked mechanism.\n"); 889 pas = mlx5_ib_update_mtt_emergency_buffer; 890 size = MLX5_UMR_MTT_MIN_CHUNK_SIZE; 891 use_emergency_buf = 1; 892 mutex_lock(&mlx5_ib_update_mtt_emergency_buffer_mutex); 893 memset(pas, 0, size); 894 } 895 pages_iter = size / sizeof(u64); 896 dma = dma_map_single(ddev, pas, size, DMA_TO_DEVICE); 897 if (dma_mapping_error(ddev, dma)) { 898 mlx5_ib_err(dev, "unable to map DMA during MTT update.\n"); 899 err = -ENOMEM; 900 goto free_pas; 901 } 902 903 for (pages_mapped = 0; 904 pages_mapped < pages_to_map && !err; 905 pages_mapped += pages_iter, start_page_index += pages_iter) { 906 dma_sync_single_for_cpu(ddev, dma, size, DMA_TO_DEVICE); 907 908 npages = min_t(size_t, 909 pages_iter, 910 ib_umem_num_pages(umem) - start_page_index); 911 912 if (!zap) { 913 __mlx5_ib_populate_pas(dev, umem, PAGE_SHIFT, 914 start_page_index, npages, pas, 915 MLX5_IB_MTT_PRESENT); 916 /* Clear padding after the pages brought from the 917 * umem. */ 918 memset(pas + npages, 0, size - npages * sizeof(u64)); 919 } 920 921 dma_sync_single_for_device(ddev, dma, size, DMA_TO_DEVICE); 922 923 memset(&wr, 0, sizeof(wr)); 924 wr.wr_id = (u64)(unsigned long)&umr_context; 925 926 sg.addr = dma; 927 sg.length = ALIGN(npages * sizeof(u64), 928 MLX5_UMR_MTT_ALIGNMENT); 929 sg.lkey = dev->umrc.mr->lkey; 930 931 wr.send_flags = MLX5_IB_SEND_UMR_FAIL_IF_FREE | 932 MLX5_IB_SEND_UMR_UPDATE_MTT; 933 wr.sg_list = &sg; 934 wr.num_sge = 1; 935 wr.opcode = MLX5_IB_WR_UMR; 936 umrwr->npages = sg.length / sizeof(u64); 937 umrwr->page_shift = PAGE_SHIFT; 938 umrwr->mkey = mr->mmr.key; 939 umrwr->target.offset = start_page_index; 940 941 mlx5_ib_init_umr_context(&umr_context); 942 down(&umrc->sem); 943 err = ib_post_send(umrc->qp, &wr, &bad); 944 if (err) { 945 mlx5_ib_err(dev, "UMR post send failed, err %d\n", err); 946 } else { 947 wait_for_completion(&umr_context.done); 948 if (umr_context.status != IB_WC_SUCCESS) { 949 mlx5_ib_err(dev, "UMR completion failed, code %d\n", 950 umr_context.status); 951 err = -EFAULT; 952 } 953 } 954 up(&umrc->sem); 955 } 956 dma_unmap_single(ddev, dma, size, DMA_TO_DEVICE); 957 958 free_pas: 959 if (!use_emergency_buf) 960 free_page((unsigned long)pas); 961 else 962 mutex_unlock(&mlx5_ib_update_mtt_emergency_buffer_mutex); 963 964 return err; 965 } 966 #endif 967 968 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, 969 u64 length, struct ib_umem *umem, 970 int npages, int page_shift, 971 int access_flags) 972 { 973 struct mlx5_ib_dev *dev = to_mdev(pd->device); 974 struct mlx5_create_mkey_mbox_in *in; 975 struct mlx5_ib_mr *mr; 976 int inlen; 977 int err; 978 bool pg_cap = !!(dev->mdev->caps.gen.flags & 979 MLX5_DEV_CAP_FLAG_ON_DMND_PG); 980 981 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 982 if (!mr) 983 return ERR_PTR(-ENOMEM); 984 985 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; 986 in = mlx5_vzalloc(inlen); 987 if (!in) { 988 err = -ENOMEM; 989 goto err_1; 990 } 991 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 992 pg_cap ? MLX5_IB_MTT_PRESENT : 0); 993 994 /* The MLX5_MKEY_INBOX_PG_ACCESS bit allows setting the access flags 995 * in the page list submitted with the command. */ 996 in->flags = pg_cap ? cpu_to_be32(MLX5_MKEY_INBOX_PG_ACCESS) : 0; 997 in->seg.flags = convert_access(access_flags) | 998 MLX5_ACCESS_MODE_MTT; 999 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1000 in->seg.start_addr = cpu_to_be64(virt_addr); 1001 in->seg.len = cpu_to_be64(length); 1002 in->seg.bsfs_octo_size = 0; 1003 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 1004 in->seg.log2_page_size = page_shift; 1005 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1006 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1007 1 << page_shift)); 1008 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, inlen, NULL, 1009 NULL, NULL); 1010 if (err) { 1011 mlx5_ib_warn(dev, "create mkey failed\n"); 1012 goto err_2; 1013 } 1014 mr->umem = umem; 1015 mr->dev = dev; 1016 mr->live = 1; 1017 kvfree(in); 1018 1019 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); 1020 1021 return mr; 1022 1023 err_2: 1024 kvfree(in); 1025 1026 err_1: 1027 kfree(mr); 1028 1029 return ERR_PTR(err); 1030 } 1031 1032 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 1033 u64 virt_addr, int access_flags, 1034 struct ib_udata *udata) 1035 { 1036 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1037 struct mlx5_ib_mr *mr = NULL; 1038 struct ib_umem *umem; 1039 int page_shift; 1040 int npages; 1041 int ncont; 1042 int order; 1043 int err; 1044 1045 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx, access_flags 0x%x\n", 1046 start, virt_addr, length, access_flags); 1047 umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 1048 0); 1049 if (IS_ERR(umem)) { 1050 mlx5_ib_dbg(dev, "umem get failed (%ld)\n", PTR_ERR(umem)); 1051 return (void *)umem; 1052 } 1053 1054 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); 1055 if (!npages) { 1056 mlx5_ib_warn(dev, "avoid zero region\n"); 1057 err = -EINVAL; 1058 goto error; 1059 } 1060 1061 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 1062 npages, ncont, order, page_shift); 1063 1064 if (use_umr(order)) { 1065 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, 1066 order, access_flags); 1067 if (PTR_ERR(mr) == -EAGAIN) { 1068 mlx5_ib_dbg(dev, "cache empty for order %d", order); 1069 mr = NULL; 1070 } 1071 } else if (access_flags & IB_ACCESS_ON_DEMAND) { 1072 err = -EINVAL; 1073 pr_err("Got MR registration for ODP MR > 512MB, not supported for Connect-IB"); 1074 goto error; 1075 } 1076 1077 if (!mr) 1078 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, 1079 access_flags); 1080 1081 if (IS_ERR(mr)) { 1082 err = PTR_ERR(mr); 1083 goto error; 1084 } 1085 1086 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); 1087 1088 mr->umem = umem; 1089 mr->npages = npages; 1090 atomic_add(npages, &dev->mdev->priv.reg_pages); 1091 mr->ibmr.lkey = mr->mmr.key; 1092 mr->ibmr.rkey = mr->mmr.key; 1093 1094 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1095 if (umem->odp_data) { 1096 /* 1097 * This barrier prevents the compiler from moving the 1098 * setting of umem->odp_data->private to point to our 1099 * MR, before reg_umr finished, to ensure that the MR 1100 * initialization have finished before starting to 1101 * handle invalidations. 1102 */ 1103 smp_wmb(); 1104 mr->umem->odp_data->private = mr; 1105 /* 1106 * Make sure we will see the new 1107 * umem->odp_data->private value in the invalidation 1108 * routines, before we can get page faults on the 1109 * MR. Page faults can happen once we put the MR in 1110 * the tree, below this line. Without the barrier, 1111 * there can be a fault handling and an invalidation 1112 * before umem->odp_data->private == mr is visible to 1113 * the invalidation handler. 1114 */ 1115 smp_wmb(); 1116 } 1117 #endif 1118 1119 return &mr->ibmr; 1120 1121 error: 1122 /* 1123 * Destroy the umem *before* destroying the MR, to ensure we 1124 * will not have any in-flight notifiers when destroying the 1125 * MR. 1126 * 1127 * As the MR is completely invalid to begin with, and this 1128 * error path is only taken if we can't push the mr entry into 1129 * the pagefault tree, this is safe. 1130 */ 1131 1132 ib_umem_release(umem); 1133 /* Kill the MR, and return an error code. */ 1134 clean_mr(mr); 1135 return ERR_PTR(err); 1136 } 1137 1138 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 1139 { 1140 struct umr_common *umrc = &dev->umrc; 1141 struct mlx5_ib_umr_context umr_context; 1142 struct ib_send_wr wr, *bad; 1143 int err; 1144 1145 memset(&wr, 0, sizeof(wr)); 1146 wr.wr_id = (u64)(unsigned long)&umr_context; 1147 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); 1148 1149 mlx5_ib_init_umr_context(&umr_context); 1150 down(&umrc->sem); 1151 err = ib_post_send(umrc->qp, &wr, &bad); 1152 if (err) { 1153 up(&umrc->sem); 1154 mlx5_ib_dbg(dev, "err %d\n", err); 1155 goto error; 1156 } else { 1157 wait_for_completion(&umr_context.done); 1158 up(&umrc->sem); 1159 } 1160 if (umr_context.status != IB_WC_SUCCESS) { 1161 mlx5_ib_warn(dev, "unreg umr failed\n"); 1162 err = -EFAULT; 1163 goto error; 1164 } 1165 return 0; 1166 1167 error: 1168 return err; 1169 } 1170 1171 static int clean_mr(struct mlx5_ib_mr *mr) 1172 { 1173 struct mlx5_ib_dev *dev = to_mdev(mr->ibmr.device); 1174 int umred = mr->umred; 1175 int err; 1176 1177 if (!umred) { 1178 err = destroy_mkey(dev, mr); 1179 if (err) { 1180 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 1181 mr->mmr.key, err); 1182 return err; 1183 } 1184 } else { 1185 err = unreg_umr(dev, mr); 1186 if (err) { 1187 mlx5_ib_warn(dev, "failed unregister\n"); 1188 return err; 1189 } 1190 free_cached_mr(dev, mr); 1191 } 1192 1193 if (!umred) 1194 kfree(mr); 1195 1196 return 0; 1197 } 1198 1199 int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 1200 { 1201 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 1202 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1203 int npages = mr->npages; 1204 struct ib_umem *umem = mr->umem; 1205 1206 #ifdef CONFIG_INFINIBAND_ON_DEMAND_PAGING 1207 if (umem && umem->odp_data) { 1208 /* Prevent new page faults from succeeding */ 1209 mr->live = 0; 1210 /* Wait for all running page-fault handlers to finish. */ 1211 synchronize_srcu(&dev->mr_srcu); 1212 /* Destroy all page mappings */ 1213 mlx5_ib_invalidate_range(umem, ib_umem_start(umem), 1214 ib_umem_end(umem)); 1215 /* 1216 * We kill the umem before the MR for ODP, 1217 * so that there will not be any invalidations in 1218 * flight, looking at the *mr struct. 1219 */ 1220 ib_umem_release(umem); 1221 atomic_sub(npages, &dev->mdev->priv.reg_pages); 1222 1223 /* Avoid double-freeing the umem. */ 1224 umem = NULL; 1225 } 1226 #endif 1227 1228 clean_mr(mr); 1229 1230 if (umem) { 1231 ib_umem_release(umem); 1232 atomic_sub(npages, &dev->mdev->priv.reg_pages); 1233 } 1234 1235 return 0; 1236 } 1237 1238 struct ib_mr *mlx5_ib_create_mr(struct ib_pd *pd, 1239 struct ib_mr_init_attr *mr_init_attr) 1240 { 1241 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1242 struct mlx5_create_mkey_mbox_in *in; 1243 struct mlx5_ib_mr *mr; 1244 int access_mode, err; 1245 int ndescs = roundup(mr_init_attr->max_reg_descriptors, 4); 1246 1247 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1248 if (!mr) 1249 return ERR_PTR(-ENOMEM); 1250 1251 in = kzalloc(sizeof(*in), GFP_KERNEL); 1252 if (!in) { 1253 err = -ENOMEM; 1254 goto err_free; 1255 } 1256 1257 in->seg.status = MLX5_MKEY_STATUS_FREE; 1258 in->seg.xlt_oct_size = cpu_to_be32(ndescs); 1259 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1260 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1261 access_mode = MLX5_ACCESS_MODE_MTT; 1262 1263 if (mr_init_attr->flags & IB_MR_SIGNATURE_EN) { 1264 u32 psv_index[2]; 1265 1266 in->seg.flags_pd = cpu_to_be32(be32_to_cpu(in->seg.flags_pd) | 1267 MLX5_MKEY_BSF_EN); 1268 in->seg.bsfs_octo_size = cpu_to_be32(MLX5_MKEY_BSF_OCTO_SIZE); 1269 mr->sig = kzalloc(sizeof(*mr->sig), GFP_KERNEL); 1270 if (!mr->sig) { 1271 err = -ENOMEM; 1272 goto err_free_in; 1273 } 1274 1275 /* create mem & wire PSVs */ 1276 err = mlx5_core_create_psv(dev->mdev, to_mpd(pd)->pdn, 1277 2, psv_index); 1278 if (err) 1279 goto err_free_sig; 1280 1281 access_mode = MLX5_ACCESS_MODE_KLM; 1282 mr->sig->psv_memory.psv_idx = psv_index[0]; 1283 mr->sig->psv_wire.psv_idx = psv_index[1]; 1284 1285 mr->sig->sig_status_checked = true; 1286 mr->sig->sig_err_exists = false; 1287 /* Next UMR, Arm SIGERR */ 1288 ++mr->sig->sigerr_count; 1289 } 1290 1291 in->seg.flags = MLX5_PERM_UMR_EN | access_mode; 1292 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), 1293 NULL, NULL, NULL); 1294 if (err) 1295 goto err_destroy_psv; 1296 1297 mr->ibmr.lkey = mr->mmr.key; 1298 mr->ibmr.rkey = mr->mmr.key; 1299 mr->umem = NULL; 1300 kfree(in); 1301 1302 return &mr->ibmr; 1303 1304 err_destroy_psv: 1305 if (mr->sig) { 1306 if (mlx5_core_destroy_psv(dev->mdev, 1307 mr->sig->psv_memory.psv_idx)) 1308 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1309 mr->sig->psv_memory.psv_idx); 1310 if (mlx5_core_destroy_psv(dev->mdev, 1311 mr->sig->psv_wire.psv_idx)) 1312 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1313 mr->sig->psv_wire.psv_idx); 1314 } 1315 err_free_sig: 1316 kfree(mr->sig); 1317 err_free_in: 1318 kfree(in); 1319 err_free: 1320 kfree(mr); 1321 return ERR_PTR(err); 1322 } 1323 1324 int mlx5_ib_destroy_mr(struct ib_mr *ibmr) 1325 { 1326 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 1327 struct mlx5_ib_mr *mr = to_mmr(ibmr); 1328 int err; 1329 1330 if (mr->sig) { 1331 if (mlx5_core_destroy_psv(dev->mdev, 1332 mr->sig->psv_memory.psv_idx)) 1333 mlx5_ib_warn(dev, "failed to destroy mem psv %d\n", 1334 mr->sig->psv_memory.psv_idx); 1335 if (mlx5_core_destroy_psv(dev->mdev, 1336 mr->sig->psv_wire.psv_idx)) 1337 mlx5_ib_warn(dev, "failed to destroy wire psv %d\n", 1338 mr->sig->psv_wire.psv_idx); 1339 kfree(mr->sig); 1340 } 1341 1342 err = destroy_mkey(dev, mr); 1343 if (err) { 1344 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 1345 mr->mmr.key, err); 1346 return err; 1347 } 1348 1349 kfree(mr); 1350 1351 return err; 1352 } 1353 1354 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, 1355 int max_page_list_len) 1356 { 1357 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1358 struct mlx5_create_mkey_mbox_in *in; 1359 struct mlx5_ib_mr *mr; 1360 int err; 1361 1362 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1363 if (!mr) 1364 return ERR_PTR(-ENOMEM); 1365 1366 in = kzalloc(sizeof(*in), GFP_KERNEL); 1367 if (!in) { 1368 err = -ENOMEM; 1369 goto err_free; 1370 } 1371 1372 in->seg.status = MLX5_MKEY_STATUS_FREE; 1373 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); 1374 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1375 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; 1376 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1377 /* 1378 * TBD not needed - issue 197292 */ 1379 in->seg.log2_page_size = PAGE_SHIFT; 1380 1381 err = mlx5_core_create_mkey(dev->mdev, &mr->mmr, in, sizeof(*in), NULL, 1382 NULL, NULL); 1383 kfree(in); 1384 if (err) 1385 goto err_free; 1386 1387 mr->ibmr.lkey = mr->mmr.key; 1388 mr->ibmr.rkey = mr->mmr.key; 1389 mr->umem = NULL; 1390 1391 return &mr->ibmr; 1392 1393 err_free: 1394 kfree(mr); 1395 return ERR_PTR(err); 1396 } 1397 1398 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 1399 int page_list_len) 1400 { 1401 struct mlx5_ib_fast_reg_page_list *mfrpl; 1402 int size = page_list_len * sizeof(u64); 1403 1404 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); 1405 if (!mfrpl) 1406 return ERR_PTR(-ENOMEM); 1407 1408 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); 1409 if (!mfrpl->ibfrpl.page_list) 1410 goto err_free; 1411 1412 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, 1413 size, &mfrpl->map, 1414 GFP_KERNEL); 1415 if (!mfrpl->mapped_page_list) 1416 goto err_free; 1417 1418 WARN_ON(mfrpl->map & 0x3f); 1419 1420 return &mfrpl->ibfrpl; 1421 1422 err_free: 1423 kfree(mfrpl->ibfrpl.page_list); 1424 kfree(mfrpl); 1425 return ERR_PTR(-ENOMEM); 1426 } 1427 1428 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) 1429 { 1430 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); 1431 struct mlx5_ib_dev *dev = to_mdev(page_list->device); 1432 int size = page_list->max_page_list_len * sizeof(u64); 1433 1434 dma_free_coherent(&dev->mdev->pdev->dev, size, mfrpl->mapped_page_list, 1435 mfrpl->map); 1436 kfree(mfrpl->ibfrpl.page_list); 1437 kfree(mfrpl); 1438 } 1439 1440 int mlx5_ib_check_mr_status(struct ib_mr *ibmr, u32 check_mask, 1441 struct ib_mr_status *mr_status) 1442 { 1443 struct mlx5_ib_mr *mmr = to_mmr(ibmr); 1444 int ret = 0; 1445 1446 if (check_mask & ~IB_MR_CHECK_SIG_STATUS) { 1447 pr_err("Invalid status check mask\n"); 1448 ret = -EINVAL; 1449 goto done; 1450 } 1451 1452 mr_status->fail_status = 0; 1453 if (check_mask & IB_MR_CHECK_SIG_STATUS) { 1454 if (!mmr->sig) { 1455 ret = -EINVAL; 1456 pr_err("signature status check requested on a non-signature enabled MR\n"); 1457 goto done; 1458 } 1459 1460 mmr->sig->sig_status_checked = true; 1461 if (!mmr->sig->sig_err_exists) 1462 goto done; 1463 1464 if (ibmr->lkey == mmr->sig->err_item.key) 1465 memcpy(&mr_status->sig_err, &mmr->sig->err_item, 1466 sizeof(mr_status->sig_err)); 1467 else { 1468 mr_status->sig_err.err_type = IB_SIG_BAD_GUARD; 1469 mr_status->sig_err.sig_err_offset = 0; 1470 mr_status->sig_err.key = mmr->sig->err_item.key; 1471 } 1472 1473 mmr->sig->sig_err_exists = false; 1474 mr_status->fail_status |= IB_MR_CHECK_SIG_STATUS; 1475 } 1476 1477 done: 1478 return ret; 1479 } 1480