1 /* 2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 34 #include <linux/kref.h> 35 #include <linux/random.h> 36 #include <linux/debugfs.h> 37 #include <linux/export.h> 38 #include <rdma/ib_umem.h> 39 #include "mlx5_ib.h" 40 41 enum { 42 DEF_CACHE_SIZE = 10, 43 }; 44 45 enum { 46 MLX5_UMR_ALIGN = 2048 47 }; 48 49 static __be64 *mr_align(__be64 *ptr, int align) 50 { 51 unsigned long mask = align - 1; 52 53 return (__be64 *)(((unsigned long)ptr + mask) & ~mask); 54 } 55 56 static int order2idx(struct mlx5_ib_dev *dev, int order) 57 { 58 struct mlx5_mr_cache *cache = &dev->cache; 59 60 if (order < cache->ent[0].order) 61 return 0; 62 else 63 return order - cache->ent[0].order; 64 } 65 66 static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 67 { 68 struct mlx5_mr_cache *cache = &dev->cache; 69 struct mlx5_cache_ent *ent = &cache->ent[c]; 70 struct mlx5_create_mkey_mbox_in *in; 71 struct mlx5_ib_mr *mr; 72 int npages = 1 << ent->order; 73 int err = 0; 74 int i; 75 76 in = kzalloc(sizeof(*in), GFP_KERNEL); 77 if (!in) 78 return -ENOMEM; 79 80 for (i = 0; i < num; i++) { 81 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 82 if (!mr) { 83 err = -ENOMEM; 84 goto out; 85 } 86 mr->order = ent->order; 87 mr->umred = 1; 88 in->seg.status = 1 << 6; 89 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); 90 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 91 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; 92 in->seg.log2_page_size = 12; 93 94 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, 95 sizeof(*in)); 96 if (err) { 97 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 98 kfree(mr); 99 goto out; 100 } 101 cache->last_add = jiffies; 102 103 spin_lock(&ent->lock); 104 list_add_tail(&mr->list, &ent->head); 105 ent->cur++; 106 ent->size++; 107 spin_unlock(&ent->lock); 108 } 109 110 out: 111 kfree(in); 112 return err; 113 } 114 115 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 116 { 117 struct mlx5_mr_cache *cache = &dev->cache; 118 struct mlx5_cache_ent *ent = &cache->ent[c]; 119 struct mlx5_ib_mr *mr; 120 int err; 121 int i; 122 123 for (i = 0; i < num; i++) { 124 spin_lock(&ent->lock); 125 if (list_empty(&ent->head)) { 126 spin_unlock(&ent->lock); 127 return; 128 } 129 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 130 list_del(&mr->list); 131 ent->cur--; 132 ent->size--; 133 spin_unlock(&ent->lock); 134 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 135 if (err) 136 mlx5_ib_warn(dev, "failed destroy mkey\n"); 137 else 138 kfree(mr); 139 } 140 } 141 142 static ssize_t size_write(struct file *filp, const char __user *buf, 143 size_t count, loff_t *pos) 144 { 145 struct mlx5_cache_ent *ent = filp->private_data; 146 struct mlx5_ib_dev *dev = ent->dev; 147 char lbuf[20]; 148 u32 var; 149 int err; 150 int c; 151 152 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 153 return -EFAULT; 154 155 c = order2idx(dev, ent->order); 156 lbuf[sizeof(lbuf) - 1] = 0; 157 158 if (sscanf(lbuf, "%u", &var) != 1) 159 return -EINVAL; 160 161 if (var < ent->limit) 162 return -EINVAL; 163 164 if (var > ent->size) { 165 err = add_keys(dev, c, var - ent->size); 166 if (err) 167 return err; 168 } else if (var < ent->size) { 169 remove_keys(dev, c, ent->size - var); 170 } 171 172 return count; 173 } 174 175 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 176 loff_t *pos) 177 { 178 struct mlx5_cache_ent *ent = filp->private_data; 179 char lbuf[20]; 180 int err; 181 182 if (*pos) 183 return 0; 184 185 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 186 if (err < 0) 187 return err; 188 189 if (copy_to_user(buf, lbuf, err)) 190 return -EFAULT; 191 192 *pos += err; 193 194 return err; 195 } 196 197 static const struct file_operations size_fops = { 198 .owner = THIS_MODULE, 199 .open = simple_open, 200 .write = size_write, 201 .read = size_read, 202 }; 203 204 static ssize_t limit_write(struct file *filp, const char __user *buf, 205 size_t count, loff_t *pos) 206 { 207 struct mlx5_cache_ent *ent = filp->private_data; 208 struct mlx5_ib_dev *dev = ent->dev; 209 char lbuf[20]; 210 u32 var; 211 int err; 212 int c; 213 214 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 215 return -EFAULT; 216 217 c = order2idx(dev, ent->order); 218 lbuf[sizeof(lbuf) - 1] = 0; 219 220 if (sscanf(lbuf, "%u", &var) != 1) 221 return -EINVAL; 222 223 if (var > ent->size) 224 return -EINVAL; 225 226 ent->limit = var; 227 228 if (ent->cur < ent->limit) { 229 err = add_keys(dev, c, 2 * ent->limit - ent->cur); 230 if (err) 231 return err; 232 } 233 234 return count; 235 } 236 237 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 238 loff_t *pos) 239 { 240 struct mlx5_cache_ent *ent = filp->private_data; 241 char lbuf[20]; 242 int err; 243 244 if (*pos) 245 return 0; 246 247 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 248 if (err < 0) 249 return err; 250 251 if (copy_to_user(buf, lbuf, err)) 252 return -EFAULT; 253 254 *pos += err; 255 256 return err; 257 } 258 259 static const struct file_operations limit_fops = { 260 .owner = THIS_MODULE, 261 .open = simple_open, 262 .write = limit_write, 263 .read = limit_read, 264 }; 265 266 static int someone_adding(struct mlx5_mr_cache *cache) 267 { 268 int i; 269 270 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 271 if (cache->ent[i].cur < cache->ent[i].limit) 272 return 1; 273 } 274 275 return 0; 276 } 277 278 static void __cache_work_func(struct mlx5_cache_ent *ent) 279 { 280 struct mlx5_ib_dev *dev = ent->dev; 281 struct mlx5_mr_cache *cache = &dev->cache; 282 int i = order2idx(dev, ent->order); 283 284 if (cache->stopped) 285 return; 286 287 ent = &dev->cache.ent[i]; 288 if (ent->cur < 2 * ent->limit) { 289 add_keys(dev, i, 1); 290 if (ent->cur < 2 * ent->limit) 291 queue_work(cache->wq, &ent->work); 292 } else if (ent->cur > 2 * ent->limit) { 293 if (!someone_adding(cache) && 294 time_after(jiffies, cache->last_add + 60 * HZ)) { 295 remove_keys(dev, i, 1); 296 if (ent->cur > ent->limit) 297 queue_work(cache->wq, &ent->work); 298 } else { 299 queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ); 300 } 301 } 302 } 303 304 static void delayed_cache_work_func(struct work_struct *work) 305 { 306 struct mlx5_cache_ent *ent; 307 308 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 309 __cache_work_func(ent); 310 } 311 312 static void cache_work_func(struct work_struct *work) 313 { 314 struct mlx5_cache_ent *ent; 315 316 ent = container_of(work, struct mlx5_cache_ent, work); 317 __cache_work_func(ent); 318 } 319 320 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 321 { 322 struct mlx5_mr_cache *cache = &dev->cache; 323 struct mlx5_ib_mr *mr = NULL; 324 struct mlx5_cache_ent *ent; 325 int c; 326 int i; 327 328 c = order2idx(dev, order); 329 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 330 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 331 return NULL; 332 } 333 334 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { 335 ent = &cache->ent[i]; 336 337 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 338 339 spin_lock(&ent->lock); 340 if (!list_empty(&ent->head)) { 341 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 342 list); 343 list_del(&mr->list); 344 ent->cur--; 345 spin_unlock(&ent->lock); 346 if (ent->cur < ent->limit) 347 queue_work(cache->wq, &ent->work); 348 break; 349 } 350 spin_unlock(&ent->lock); 351 352 queue_work(cache->wq, &ent->work); 353 354 if (mr) 355 break; 356 } 357 358 if (!mr) 359 cache->ent[c].miss++; 360 361 return mr; 362 } 363 364 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 365 { 366 struct mlx5_mr_cache *cache = &dev->cache; 367 struct mlx5_cache_ent *ent; 368 int shrink = 0; 369 int c; 370 371 c = order2idx(dev, mr->order); 372 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 373 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 374 return; 375 } 376 ent = &cache->ent[c]; 377 spin_lock(&ent->lock); 378 list_add_tail(&mr->list, &ent->head); 379 ent->cur++; 380 if (ent->cur > 2 * ent->limit) 381 shrink = 1; 382 spin_unlock(&ent->lock); 383 384 if (shrink) 385 queue_work(cache->wq, &ent->work); 386 } 387 388 static void clean_keys(struct mlx5_ib_dev *dev, int c) 389 { 390 struct mlx5_mr_cache *cache = &dev->cache; 391 struct mlx5_cache_ent *ent = &cache->ent[c]; 392 struct mlx5_ib_mr *mr; 393 int err; 394 395 cancel_delayed_work(&ent->dwork); 396 while (1) { 397 spin_lock(&ent->lock); 398 if (list_empty(&ent->head)) { 399 spin_unlock(&ent->lock); 400 return; 401 } 402 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 403 list_del(&mr->list); 404 ent->cur--; 405 ent->size--; 406 spin_unlock(&ent->lock); 407 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 408 if (err) 409 mlx5_ib_warn(dev, "failed destroy mkey\n"); 410 else 411 kfree(mr); 412 } 413 } 414 415 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) 416 { 417 struct mlx5_mr_cache *cache = &dev->cache; 418 struct mlx5_cache_ent *ent; 419 int i; 420 421 if (!mlx5_debugfs_root) 422 return 0; 423 424 cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root); 425 if (!cache->root) 426 return -ENOMEM; 427 428 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 429 ent = &cache->ent[i]; 430 sprintf(ent->name, "%d", ent->order); 431 ent->dir = debugfs_create_dir(ent->name, cache->root); 432 if (!ent->dir) 433 return -ENOMEM; 434 435 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, 436 &size_fops); 437 if (!ent->fsize) 438 return -ENOMEM; 439 440 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, 441 &limit_fops); 442 if (!ent->flimit) 443 return -ENOMEM; 444 445 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, 446 &ent->cur); 447 if (!ent->fcur) 448 return -ENOMEM; 449 450 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, 451 &ent->miss); 452 if (!ent->fmiss) 453 return -ENOMEM; 454 } 455 456 return 0; 457 } 458 459 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 460 { 461 if (!mlx5_debugfs_root) 462 return; 463 464 debugfs_remove_recursive(dev->cache.root); 465 } 466 467 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 468 { 469 struct mlx5_mr_cache *cache = &dev->cache; 470 struct mlx5_cache_ent *ent; 471 int limit; 472 int size; 473 int err; 474 int i; 475 476 cache->wq = create_singlethread_workqueue("mkey_cache"); 477 if (!cache->wq) { 478 mlx5_ib_warn(dev, "failed to create work queue\n"); 479 return -ENOMEM; 480 } 481 482 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 483 INIT_LIST_HEAD(&cache->ent[i].head); 484 spin_lock_init(&cache->ent[i].lock); 485 486 ent = &cache->ent[i]; 487 INIT_LIST_HEAD(&ent->head); 488 spin_lock_init(&ent->lock); 489 ent->order = i + 2; 490 ent->dev = dev; 491 492 if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) { 493 size = dev->mdev.profile->mr_cache[i].size; 494 limit = dev->mdev.profile->mr_cache[i].limit; 495 } else { 496 size = DEF_CACHE_SIZE; 497 limit = 0; 498 } 499 INIT_WORK(&ent->work, cache_work_func); 500 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 501 ent->limit = limit; 502 queue_work(cache->wq, &ent->work); 503 } 504 505 err = mlx5_mr_cache_debugfs_init(dev); 506 if (err) 507 mlx5_ib_warn(dev, "cache debugfs failure\n"); 508 509 return 0; 510 } 511 512 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 513 { 514 int i; 515 516 dev->cache.stopped = 1; 517 flush_workqueue(dev->cache.wq); 518 519 mlx5_mr_cache_debugfs_cleanup(dev); 520 521 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 522 clean_keys(dev, i); 523 524 destroy_workqueue(dev->cache.wq); 525 526 return 0; 527 } 528 529 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 530 { 531 struct mlx5_ib_dev *dev = to_mdev(pd->device); 532 struct mlx5_core_dev *mdev = &dev->mdev; 533 struct mlx5_create_mkey_mbox_in *in; 534 struct mlx5_mkey_seg *seg; 535 struct mlx5_ib_mr *mr; 536 int err; 537 538 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 539 if (!mr) 540 return ERR_PTR(-ENOMEM); 541 542 in = kzalloc(sizeof(*in), GFP_KERNEL); 543 if (!in) { 544 err = -ENOMEM; 545 goto err_free; 546 } 547 548 seg = &in->seg; 549 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; 550 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); 551 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 552 seg->start_addr = 0; 553 554 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in)); 555 if (err) 556 goto err_in; 557 558 kfree(in); 559 mr->ibmr.lkey = mr->mmr.key; 560 mr->ibmr.rkey = mr->mmr.key; 561 mr->umem = NULL; 562 563 return &mr->ibmr; 564 565 err_in: 566 kfree(in); 567 568 err_free: 569 kfree(mr); 570 571 return ERR_PTR(err); 572 } 573 574 static int get_octo_len(u64 addr, u64 len, int page_size) 575 { 576 u64 offset; 577 int npages; 578 579 offset = addr & (page_size - 1); 580 npages = ALIGN(len + offset, page_size) >> ilog2(page_size); 581 return (npages + 1) / 2; 582 } 583 584 static int use_umr(int order) 585 { 586 return order <= 17; 587 } 588 589 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 590 struct ib_sge *sg, u64 dma, int n, u32 key, 591 int page_shift, u64 virt_addr, u64 len, 592 int access_flags) 593 { 594 struct mlx5_ib_dev *dev = to_mdev(pd->device); 595 struct ib_mr *mr = dev->umrc.mr; 596 597 sg->addr = dma; 598 sg->length = ALIGN(sizeof(u64) * n, 64); 599 sg->lkey = mr->lkey; 600 601 wr->next = NULL; 602 wr->send_flags = 0; 603 wr->sg_list = sg; 604 if (n) 605 wr->num_sge = 1; 606 else 607 wr->num_sge = 0; 608 609 wr->opcode = MLX5_IB_WR_UMR; 610 wr->wr.fast_reg.page_list_len = n; 611 wr->wr.fast_reg.page_shift = page_shift; 612 wr->wr.fast_reg.rkey = key; 613 wr->wr.fast_reg.iova_start = virt_addr; 614 wr->wr.fast_reg.length = len; 615 wr->wr.fast_reg.access_flags = access_flags; 616 wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd; 617 } 618 619 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, 620 struct ib_send_wr *wr, u32 key) 621 { 622 wr->send_flags = MLX5_IB_SEND_UMR_UNREG; 623 wr->opcode = MLX5_IB_WR_UMR; 624 wr->wr.fast_reg.rkey = key; 625 } 626 627 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) 628 { 629 struct mlx5_ib_mr *mr; 630 struct ib_wc wc; 631 int err; 632 633 while (1) { 634 err = ib_poll_cq(cq, 1, &wc); 635 if (err < 0) { 636 pr_warn("poll cq error %d\n", err); 637 return; 638 } 639 if (err == 0) 640 break; 641 642 mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id; 643 mr->status = wc.status; 644 complete(&mr->done); 645 } 646 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 647 } 648 649 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, 650 u64 virt_addr, u64 len, int npages, 651 int page_shift, int order, int access_flags) 652 { 653 struct mlx5_ib_dev *dev = to_mdev(pd->device); 654 struct device *ddev = dev->ib_dev.dma_device; 655 struct umr_common *umrc = &dev->umrc; 656 struct ib_send_wr wr, *bad; 657 struct mlx5_ib_mr *mr; 658 struct ib_sge sg; 659 int size = sizeof(u64) * npages; 660 int err; 661 int i; 662 663 for (i = 0; i < 10; i++) { 664 mr = alloc_cached_mr(dev, order); 665 if (mr) 666 break; 667 668 err = add_keys(dev, order2idx(dev, order), 1); 669 if (err) { 670 mlx5_ib_warn(dev, "add_keys failed\n"); 671 break; 672 } 673 } 674 675 if (!mr) 676 return ERR_PTR(-EAGAIN); 677 678 mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); 679 if (!mr->pas) { 680 err = -ENOMEM; 681 goto error; 682 } 683 684 mlx5_ib_populate_pas(dev, umem, page_shift, 685 mr_align(mr->pas, MLX5_UMR_ALIGN), 1); 686 687 mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size, 688 DMA_TO_DEVICE); 689 if (dma_mapping_error(ddev, mr->dma)) { 690 kfree(mr->pas); 691 err = -ENOMEM; 692 goto error; 693 } 694 695 memset(&wr, 0, sizeof(wr)); 696 wr.wr_id = (u64)(unsigned long)mr; 697 prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); 698 699 /* We serialize polls so one process does not kidnap another's 700 * completion. This is not a problem since wr is completed in 701 * around 1 usec 702 */ 703 down(&umrc->sem); 704 init_completion(&mr->done); 705 err = ib_post_send(umrc->qp, &wr, &bad); 706 if (err) { 707 mlx5_ib_warn(dev, "post send failed, err %d\n", err); 708 up(&umrc->sem); 709 goto error; 710 } 711 wait_for_completion(&mr->done); 712 up(&umrc->sem); 713 714 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); 715 kfree(mr->pas); 716 717 if (mr->status != IB_WC_SUCCESS) { 718 mlx5_ib_warn(dev, "reg umr failed\n"); 719 err = -EFAULT; 720 goto error; 721 } 722 723 return mr; 724 725 error: 726 free_cached_mr(dev, mr); 727 return ERR_PTR(err); 728 } 729 730 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, 731 u64 length, struct ib_umem *umem, 732 int npages, int page_shift, 733 int access_flags) 734 { 735 struct mlx5_ib_dev *dev = to_mdev(pd->device); 736 struct mlx5_create_mkey_mbox_in *in; 737 struct mlx5_ib_mr *mr; 738 int inlen; 739 int err; 740 741 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 742 if (!mr) 743 return ERR_PTR(-ENOMEM); 744 745 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; 746 in = mlx5_vzalloc(inlen); 747 if (!in) { 748 err = -ENOMEM; 749 goto err_1; 750 } 751 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0); 752 753 in->seg.flags = convert_access(access_flags) | 754 MLX5_ACCESS_MODE_MTT; 755 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 756 in->seg.start_addr = cpu_to_be64(virt_addr); 757 in->seg.len = cpu_to_be64(length); 758 in->seg.bsfs_octo_size = 0; 759 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 760 in->seg.log2_page_size = page_shift; 761 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 762 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 763 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen); 764 if (err) { 765 mlx5_ib_warn(dev, "create mkey failed\n"); 766 goto err_2; 767 } 768 mr->umem = umem; 769 mlx5_vfree(in); 770 771 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); 772 773 return mr; 774 775 err_2: 776 mlx5_vfree(in); 777 778 err_1: 779 kfree(mr); 780 781 return ERR_PTR(err); 782 } 783 784 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 785 u64 virt_addr, int access_flags, 786 struct ib_udata *udata) 787 { 788 struct mlx5_ib_dev *dev = to_mdev(pd->device); 789 struct mlx5_ib_mr *mr = NULL; 790 struct ib_umem *umem; 791 int page_shift; 792 int npages; 793 int ncont; 794 int order; 795 int err; 796 797 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n", 798 start, virt_addr, length); 799 umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 800 0); 801 if (IS_ERR(umem)) { 802 mlx5_ib_dbg(dev, "umem get failed\n"); 803 return (void *)umem; 804 } 805 806 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); 807 if (!npages) { 808 mlx5_ib_warn(dev, "avoid zero region\n"); 809 err = -EINVAL; 810 goto error; 811 } 812 813 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 814 npages, ncont, order, page_shift); 815 816 if (use_umr(order)) { 817 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, 818 order, access_flags); 819 if (PTR_ERR(mr) == -EAGAIN) { 820 mlx5_ib_dbg(dev, "cache empty for order %d", order); 821 mr = NULL; 822 } 823 } 824 825 if (!mr) 826 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, 827 access_flags); 828 829 if (IS_ERR(mr)) { 830 err = PTR_ERR(mr); 831 goto error; 832 } 833 834 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); 835 836 mr->umem = umem; 837 mr->npages = npages; 838 spin_lock(&dev->mr_lock); 839 dev->mdev.priv.reg_pages += npages; 840 spin_unlock(&dev->mr_lock); 841 mr->ibmr.lkey = mr->mmr.key; 842 mr->ibmr.rkey = mr->mmr.key; 843 844 return &mr->ibmr; 845 846 error: 847 ib_umem_release(umem); 848 return ERR_PTR(err); 849 } 850 851 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 852 { 853 struct umr_common *umrc = &dev->umrc; 854 struct ib_send_wr wr, *bad; 855 int err; 856 857 memset(&wr, 0, sizeof(wr)); 858 wr.wr_id = (u64)(unsigned long)mr; 859 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); 860 861 down(&umrc->sem); 862 init_completion(&mr->done); 863 err = ib_post_send(umrc->qp, &wr, &bad); 864 if (err) { 865 up(&umrc->sem); 866 mlx5_ib_dbg(dev, "err %d\n", err); 867 goto error; 868 } 869 wait_for_completion(&mr->done); 870 up(&umrc->sem); 871 if (mr->status != IB_WC_SUCCESS) { 872 mlx5_ib_warn(dev, "unreg umr failed\n"); 873 err = -EFAULT; 874 goto error; 875 } 876 return 0; 877 878 error: 879 return err; 880 } 881 882 int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 883 { 884 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 885 struct mlx5_ib_mr *mr = to_mmr(ibmr); 886 struct ib_umem *umem = mr->umem; 887 int npages = mr->npages; 888 int umred = mr->umred; 889 int err; 890 891 if (!umred) { 892 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 893 if (err) { 894 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 895 mr->mmr.key, err); 896 return err; 897 } 898 } else { 899 err = unreg_umr(dev, mr); 900 if (err) { 901 mlx5_ib_warn(dev, "failed unregister\n"); 902 return err; 903 } 904 free_cached_mr(dev, mr); 905 } 906 907 if (umem) { 908 ib_umem_release(umem); 909 spin_lock(&dev->mr_lock); 910 dev->mdev.priv.reg_pages -= npages; 911 spin_unlock(&dev->mr_lock); 912 } 913 914 if (!umred) 915 kfree(mr); 916 917 return 0; 918 } 919 920 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, 921 int max_page_list_len) 922 { 923 struct mlx5_ib_dev *dev = to_mdev(pd->device); 924 struct mlx5_create_mkey_mbox_in *in; 925 struct mlx5_ib_mr *mr; 926 int err; 927 928 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 929 if (!mr) 930 return ERR_PTR(-ENOMEM); 931 932 in = kzalloc(sizeof(*in), GFP_KERNEL); 933 if (!in) { 934 err = -ENOMEM; 935 goto err_free; 936 } 937 938 in->seg.status = 1 << 6; /* free */ 939 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); 940 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 941 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; 942 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 943 /* 944 * TBD not needed - issue 197292 */ 945 in->seg.log2_page_size = PAGE_SHIFT; 946 947 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in)); 948 kfree(in); 949 if (err) 950 goto err_free; 951 952 mr->ibmr.lkey = mr->mmr.key; 953 mr->ibmr.rkey = mr->mmr.key; 954 mr->umem = NULL; 955 956 return &mr->ibmr; 957 958 err_free: 959 kfree(mr); 960 return ERR_PTR(err); 961 } 962 963 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 964 int page_list_len) 965 { 966 struct mlx5_ib_fast_reg_page_list *mfrpl; 967 int size = page_list_len * sizeof(u64); 968 969 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); 970 if (!mfrpl) 971 return ERR_PTR(-ENOMEM); 972 973 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); 974 if (!mfrpl->ibfrpl.page_list) 975 goto err_free; 976 977 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, 978 size, &mfrpl->map, 979 GFP_KERNEL); 980 if (!mfrpl->mapped_page_list) 981 goto err_free; 982 983 WARN_ON(mfrpl->map & 0x3f); 984 985 return &mfrpl->ibfrpl; 986 987 err_free: 988 kfree(mfrpl->ibfrpl.page_list); 989 kfree(mfrpl); 990 return ERR_PTR(-ENOMEM); 991 } 992 993 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) 994 { 995 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); 996 struct mlx5_ib_dev *dev = to_mdev(page_list->device); 997 int size = page_list->max_page_list_len * sizeof(u64); 998 999 dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list, 1000 mfrpl->map); 1001 kfree(mfrpl->ibfrpl.page_list); 1002 kfree(mfrpl); 1003 } 1004