1 /* 2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 34 #include <linux/kref.h> 35 #include <linux/random.h> 36 #include <linux/debugfs.h> 37 #include <linux/export.h> 38 #include <rdma/ib_umem.h> 39 #include "mlx5_ib.h" 40 41 enum { 42 DEF_CACHE_SIZE = 10, 43 }; 44 45 static __be64 *mr_align(__be64 *ptr, int align) 46 { 47 unsigned long mask = align - 1; 48 49 return (__be64 *)(((unsigned long)ptr + mask) & ~mask); 50 } 51 52 static int order2idx(struct mlx5_ib_dev *dev, int order) 53 { 54 struct mlx5_mr_cache *cache = &dev->cache; 55 56 if (order < cache->ent[0].order) 57 return 0; 58 else 59 return order - cache->ent[0].order; 60 } 61 62 static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 63 { 64 struct device *ddev = dev->ib_dev.dma_device; 65 struct mlx5_mr_cache *cache = &dev->cache; 66 struct mlx5_cache_ent *ent = &cache->ent[c]; 67 struct mlx5_create_mkey_mbox_in *in; 68 struct mlx5_ib_mr *mr; 69 int npages = 1 << ent->order; 70 int size = sizeof(u64) * npages; 71 int err = 0; 72 int i; 73 74 in = kzalloc(sizeof(*in), GFP_KERNEL); 75 if (!in) 76 return -ENOMEM; 77 78 for (i = 0; i < num; i++) { 79 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 80 if (!mr) { 81 err = -ENOMEM; 82 goto out; 83 } 84 mr->order = ent->order; 85 mr->umred = 1; 86 mr->pas = kmalloc(size + 0x3f, GFP_KERNEL); 87 if (!mr->pas) { 88 kfree(mr); 89 err = -ENOMEM; 90 goto out; 91 } 92 mr->dma = dma_map_single(ddev, mr_align(mr->pas, 0x40), size, 93 DMA_TO_DEVICE); 94 if (dma_mapping_error(ddev, mr->dma)) { 95 kfree(mr->pas); 96 kfree(mr); 97 err = -ENOMEM; 98 goto out; 99 } 100 101 in->seg.status = 1 << 6; 102 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); 103 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 104 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; 105 in->seg.log2_page_size = 12; 106 107 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, 108 sizeof(*in)); 109 if (err) { 110 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 111 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); 112 kfree(mr->pas); 113 kfree(mr); 114 goto out; 115 } 116 cache->last_add = jiffies; 117 118 spin_lock(&ent->lock); 119 list_add_tail(&mr->list, &ent->head); 120 ent->cur++; 121 ent->size++; 122 spin_unlock(&ent->lock); 123 } 124 125 out: 126 kfree(in); 127 return err; 128 } 129 130 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 131 { 132 struct device *ddev = dev->ib_dev.dma_device; 133 struct mlx5_mr_cache *cache = &dev->cache; 134 struct mlx5_cache_ent *ent = &cache->ent[c]; 135 struct mlx5_ib_mr *mr; 136 int size; 137 int err; 138 int i; 139 140 for (i = 0; i < num; i++) { 141 spin_lock(&ent->lock); 142 if (list_empty(&ent->head)) { 143 spin_unlock(&ent->lock); 144 return; 145 } 146 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 147 list_del(&mr->list); 148 ent->cur--; 149 ent->size--; 150 spin_unlock(&ent->lock); 151 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 152 if (err) { 153 mlx5_ib_warn(dev, "failed destroy mkey\n"); 154 } else { 155 size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40); 156 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); 157 kfree(mr->pas); 158 kfree(mr); 159 } 160 } 161 } 162 163 static ssize_t size_write(struct file *filp, const char __user *buf, 164 size_t count, loff_t *pos) 165 { 166 struct mlx5_cache_ent *ent = filp->private_data; 167 struct mlx5_ib_dev *dev = ent->dev; 168 char lbuf[20]; 169 u32 var; 170 int err; 171 int c; 172 173 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 174 return -EFAULT; 175 176 c = order2idx(dev, ent->order); 177 lbuf[sizeof(lbuf) - 1] = 0; 178 179 if (sscanf(lbuf, "%u", &var) != 1) 180 return -EINVAL; 181 182 if (var < ent->limit) 183 return -EINVAL; 184 185 if (var > ent->size) { 186 err = add_keys(dev, c, var - ent->size); 187 if (err) 188 return err; 189 } else if (var < ent->size) { 190 remove_keys(dev, c, ent->size - var); 191 } 192 193 return count; 194 } 195 196 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 197 loff_t *pos) 198 { 199 struct mlx5_cache_ent *ent = filp->private_data; 200 char lbuf[20]; 201 int err; 202 203 if (*pos) 204 return 0; 205 206 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 207 if (err < 0) 208 return err; 209 210 if (copy_to_user(buf, lbuf, err)) 211 return -EFAULT; 212 213 *pos += err; 214 215 return err; 216 } 217 218 static const struct file_operations size_fops = { 219 .owner = THIS_MODULE, 220 .open = simple_open, 221 .write = size_write, 222 .read = size_read, 223 }; 224 225 static ssize_t limit_write(struct file *filp, const char __user *buf, 226 size_t count, loff_t *pos) 227 { 228 struct mlx5_cache_ent *ent = filp->private_data; 229 struct mlx5_ib_dev *dev = ent->dev; 230 char lbuf[20]; 231 u32 var; 232 int err; 233 int c; 234 235 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 236 return -EFAULT; 237 238 c = order2idx(dev, ent->order); 239 lbuf[sizeof(lbuf) - 1] = 0; 240 241 if (sscanf(lbuf, "%u", &var) != 1) 242 return -EINVAL; 243 244 if (var > ent->size) 245 return -EINVAL; 246 247 ent->limit = var; 248 249 if (ent->cur < ent->limit) { 250 err = add_keys(dev, c, 2 * ent->limit - ent->cur); 251 if (err) 252 return err; 253 } 254 255 return count; 256 } 257 258 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 259 loff_t *pos) 260 { 261 struct mlx5_cache_ent *ent = filp->private_data; 262 char lbuf[20]; 263 int err; 264 265 if (*pos) 266 return 0; 267 268 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 269 if (err < 0) 270 return err; 271 272 if (copy_to_user(buf, lbuf, err)) 273 return -EFAULT; 274 275 *pos += err; 276 277 return err; 278 } 279 280 static const struct file_operations limit_fops = { 281 .owner = THIS_MODULE, 282 .open = simple_open, 283 .write = limit_write, 284 .read = limit_read, 285 }; 286 287 static int someone_adding(struct mlx5_mr_cache *cache) 288 { 289 int i; 290 291 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 292 if (cache->ent[i].cur < cache->ent[i].limit) 293 return 1; 294 } 295 296 return 0; 297 } 298 299 static void __cache_work_func(struct mlx5_cache_ent *ent) 300 { 301 struct mlx5_ib_dev *dev = ent->dev; 302 struct mlx5_mr_cache *cache = &dev->cache; 303 int i = order2idx(dev, ent->order); 304 305 if (cache->stopped) 306 return; 307 308 ent = &dev->cache.ent[i]; 309 if (ent->cur < 2 * ent->limit) { 310 add_keys(dev, i, 1); 311 if (ent->cur < 2 * ent->limit) 312 queue_work(cache->wq, &ent->work); 313 } else if (ent->cur > 2 * ent->limit) { 314 if (!someone_adding(cache) && 315 time_after(jiffies, cache->last_add + 60 * HZ)) { 316 remove_keys(dev, i, 1); 317 if (ent->cur > ent->limit) 318 queue_work(cache->wq, &ent->work); 319 } else { 320 queue_delayed_work(cache->wq, &ent->dwork, 60 * HZ); 321 } 322 } 323 } 324 325 static void delayed_cache_work_func(struct work_struct *work) 326 { 327 struct mlx5_cache_ent *ent; 328 329 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 330 __cache_work_func(ent); 331 } 332 333 static void cache_work_func(struct work_struct *work) 334 { 335 struct mlx5_cache_ent *ent; 336 337 ent = container_of(work, struct mlx5_cache_ent, work); 338 __cache_work_func(ent); 339 } 340 341 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 342 { 343 struct mlx5_mr_cache *cache = &dev->cache; 344 struct mlx5_ib_mr *mr = NULL; 345 struct mlx5_cache_ent *ent; 346 int c; 347 int i; 348 349 c = order2idx(dev, order); 350 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 351 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 352 return NULL; 353 } 354 355 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { 356 ent = &cache->ent[i]; 357 358 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 359 360 spin_lock(&ent->lock); 361 if (!list_empty(&ent->head)) { 362 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 363 list); 364 list_del(&mr->list); 365 ent->cur--; 366 spin_unlock(&ent->lock); 367 if (ent->cur < ent->limit) 368 queue_work(cache->wq, &ent->work); 369 break; 370 } 371 spin_unlock(&ent->lock); 372 373 queue_work(cache->wq, &ent->work); 374 375 if (mr) 376 break; 377 } 378 379 if (!mr) 380 cache->ent[c].miss++; 381 382 return mr; 383 } 384 385 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 386 { 387 struct mlx5_mr_cache *cache = &dev->cache; 388 struct mlx5_cache_ent *ent; 389 int shrink = 0; 390 int c; 391 392 c = order2idx(dev, mr->order); 393 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 394 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 395 return; 396 } 397 ent = &cache->ent[c]; 398 spin_lock(&ent->lock); 399 list_add_tail(&mr->list, &ent->head); 400 ent->cur++; 401 if (ent->cur > 2 * ent->limit) 402 shrink = 1; 403 spin_unlock(&ent->lock); 404 405 if (shrink) 406 queue_work(cache->wq, &ent->work); 407 } 408 409 static void clean_keys(struct mlx5_ib_dev *dev, int c) 410 { 411 struct device *ddev = dev->ib_dev.dma_device; 412 struct mlx5_mr_cache *cache = &dev->cache; 413 struct mlx5_cache_ent *ent = &cache->ent[c]; 414 struct mlx5_ib_mr *mr; 415 int size; 416 int err; 417 418 while (1) { 419 spin_lock(&ent->lock); 420 if (list_empty(&ent->head)) { 421 spin_unlock(&ent->lock); 422 return; 423 } 424 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 425 list_del(&mr->list); 426 ent->cur--; 427 ent->size--; 428 spin_unlock(&ent->lock); 429 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 430 if (err) { 431 mlx5_ib_warn(dev, "failed destroy mkey\n"); 432 } else { 433 size = ALIGN(sizeof(u64) * (1 << mr->order), 0x40); 434 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); 435 kfree(mr->pas); 436 kfree(mr); 437 } 438 } 439 } 440 441 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) 442 { 443 struct mlx5_mr_cache *cache = &dev->cache; 444 struct mlx5_cache_ent *ent; 445 int i; 446 447 if (!mlx5_debugfs_root) 448 return 0; 449 450 cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root); 451 if (!cache->root) 452 return -ENOMEM; 453 454 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 455 ent = &cache->ent[i]; 456 sprintf(ent->name, "%d", ent->order); 457 ent->dir = debugfs_create_dir(ent->name, cache->root); 458 if (!ent->dir) 459 return -ENOMEM; 460 461 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, 462 &size_fops); 463 if (!ent->fsize) 464 return -ENOMEM; 465 466 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, 467 &limit_fops); 468 if (!ent->flimit) 469 return -ENOMEM; 470 471 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, 472 &ent->cur); 473 if (!ent->fcur) 474 return -ENOMEM; 475 476 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, 477 &ent->miss); 478 if (!ent->fmiss) 479 return -ENOMEM; 480 } 481 482 return 0; 483 } 484 485 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 486 { 487 if (!mlx5_debugfs_root) 488 return; 489 490 debugfs_remove_recursive(dev->cache.root); 491 } 492 493 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 494 { 495 struct mlx5_mr_cache *cache = &dev->cache; 496 struct mlx5_cache_ent *ent; 497 int limit; 498 int size; 499 int err; 500 int i; 501 502 cache->wq = create_singlethread_workqueue("mkey_cache"); 503 if (!cache->wq) { 504 mlx5_ib_warn(dev, "failed to create work queue\n"); 505 return -ENOMEM; 506 } 507 508 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 509 INIT_LIST_HEAD(&cache->ent[i].head); 510 spin_lock_init(&cache->ent[i].lock); 511 512 ent = &cache->ent[i]; 513 INIT_LIST_HEAD(&ent->head); 514 spin_lock_init(&ent->lock); 515 ent->order = i + 2; 516 ent->dev = dev; 517 518 if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) { 519 size = dev->mdev.profile->mr_cache[i].size; 520 limit = dev->mdev.profile->mr_cache[i].limit; 521 } else { 522 size = DEF_CACHE_SIZE; 523 limit = 0; 524 } 525 INIT_WORK(&ent->work, cache_work_func); 526 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 527 ent->limit = limit; 528 queue_work(cache->wq, &ent->work); 529 } 530 531 err = mlx5_mr_cache_debugfs_init(dev); 532 if (err) 533 mlx5_ib_warn(dev, "cache debugfs failure\n"); 534 535 return 0; 536 } 537 538 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 539 { 540 int i; 541 542 dev->cache.stopped = 1; 543 destroy_workqueue(dev->cache.wq); 544 545 mlx5_mr_cache_debugfs_cleanup(dev); 546 547 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 548 clean_keys(dev, i); 549 550 return 0; 551 } 552 553 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 554 { 555 struct mlx5_ib_dev *dev = to_mdev(pd->device); 556 struct mlx5_core_dev *mdev = &dev->mdev; 557 struct mlx5_create_mkey_mbox_in *in; 558 struct mlx5_mkey_seg *seg; 559 struct mlx5_ib_mr *mr; 560 int err; 561 562 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 563 if (!mr) 564 return ERR_PTR(-ENOMEM); 565 566 in = kzalloc(sizeof(*in), GFP_KERNEL); 567 if (!in) { 568 err = -ENOMEM; 569 goto err_free; 570 } 571 572 seg = &in->seg; 573 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; 574 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); 575 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 576 seg->start_addr = 0; 577 578 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in)); 579 if (err) 580 goto err_in; 581 582 kfree(in); 583 mr->ibmr.lkey = mr->mmr.key; 584 mr->ibmr.rkey = mr->mmr.key; 585 mr->umem = NULL; 586 587 return &mr->ibmr; 588 589 err_in: 590 kfree(in); 591 592 err_free: 593 kfree(mr); 594 595 return ERR_PTR(err); 596 } 597 598 static int get_octo_len(u64 addr, u64 len, int page_size) 599 { 600 u64 offset; 601 int npages; 602 603 offset = addr & (page_size - 1); 604 npages = ALIGN(len + offset, page_size) >> ilog2(page_size); 605 return (npages + 1) / 2; 606 } 607 608 static int use_umr(int order) 609 { 610 return order <= 17; 611 } 612 613 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 614 struct ib_sge *sg, u64 dma, int n, u32 key, 615 int page_shift, u64 virt_addr, u64 len, 616 int access_flags) 617 { 618 struct mlx5_ib_dev *dev = to_mdev(pd->device); 619 struct ib_mr *mr = dev->umrc.mr; 620 621 sg->addr = dma; 622 sg->length = ALIGN(sizeof(u64) * n, 64); 623 sg->lkey = mr->lkey; 624 625 wr->next = NULL; 626 wr->send_flags = 0; 627 wr->sg_list = sg; 628 if (n) 629 wr->num_sge = 1; 630 else 631 wr->num_sge = 0; 632 633 wr->opcode = MLX5_IB_WR_UMR; 634 wr->wr.fast_reg.page_list_len = n; 635 wr->wr.fast_reg.page_shift = page_shift; 636 wr->wr.fast_reg.rkey = key; 637 wr->wr.fast_reg.iova_start = virt_addr; 638 wr->wr.fast_reg.length = len; 639 wr->wr.fast_reg.access_flags = access_flags; 640 wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd; 641 } 642 643 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, 644 struct ib_send_wr *wr, u32 key) 645 { 646 wr->send_flags = MLX5_IB_SEND_UMR_UNREG; 647 wr->opcode = MLX5_IB_WR_UMR; 648 wr->wr.fast_reg.rkey = key; 649 } 650 651 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) 652 { 653 struct mlx5_ib_mr *mr; 654 struct ib_wc wc; 655 int err; 656 657 while (1) { 658 err = ib_poll_cq(cq, 1, &wc); 659 if (err < 0) { 660 pr_warn("poll cq error %d\n", err); 661 return; 662 } 663 if (err == 0) 664 break; 665 666 mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id; 667 mr->status = wc.status; 668 complete(&mr->done); 669 } 670 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 671 } 672 673 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, 674 u64 virt_addr, u64 len, int npages, 675 int page_shift, int order, int access_flags) 676 { 677 struct mlx5_ib_dev *dev = to_mdev(pd->device); 678 struct umr_common *umrc = &dev->umrc; 679 struct ib_send_wr wr, *bad; 680 struct mlx5_ib_mr *mr; 681 struct ib_sge sg; 682 int err; 683 int i; 684 685 for (i = 0; i < 10; i++) { 686 mr = alloc_cached_mr(dev, order); 687 if (mr) 688 break; 689 690 err = add_keys(dev, order2idx(dev, order), 1); 691 if (err) { 692 mlx5_ib_warn(dev, "add_keys failed\n"); 693 break; 694 } 695 } 696 697 if (!mr) 698 return ERR_PTR(-EAGAIN); 699 700 mlx5_ib_populate_pas(dev, umem, page_shift, mr_align(mr->pas, 0x40), 1); 701 702 memset(&wr, 0, sizeof(wr)); 703 wr.wr_id = (u64)(unsigned long)mr; 704 prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); 705 706 /* We serialize polls so one process does not kidnap another's 707 * completion. This is not a problem since wr is completed in 708 * around 1 usec 709 */ 710 down(&umrc->sem); 711 init_completion(&mr->done); 712 err = ib_post_send(umrc->qp, &wr, &bad); 713 if (err) { 714 mlx5_ib_warn(dev, "post send failed, err %d\n", err); 715 up(&umrc->sem); 716 goto error; 717 } 718 wait_for_completion(&mr->done); 719 up(&umrc->sem); 720 721 if (mr->status != IB_WC_SUCCESS) { 722 mlx5_ib_warn(dev, "reg umr failed\n"); 723 err = -EFAULT; 724 goto error; 725 } 726 727 return mr; 728 729 error: 730 free_cached_mr(dev, mr); 731 return ERR_PTR(err); 732 } 733 734 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, 735 u64 length, struct ib_umem *umem, 736 int npages, int page_shift, 737 int access_flags) 738 { 739 struct mlx5_ib_dev *dev = to_mdev(pd->device); 740 struct mlx5_create_mkey_mbox_in *in; 741 struct mlx5_ib_mr *mr; 742 int inlen; 743 int err; 744 745 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 746 if (!mr) 747 return ERR_PTR(-ENOMEM); 748 749 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; 750 in = mlx5_vzalloc(inlen); 751 if (!in) { 752 err = -ENOMEM; 753 goto err_1; 754 } 755 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0); 756 757 in->seg.flags = convert_access(access_flags) | 758 MLX5_ACCESS_MODE_MTT; 759 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 760 in->seg.start_addr = cpu_to_be64(virt_addr); 761 in->seg.len = cpu_to_be64(length); 762 in->seg.bsfs_octo_size = 0; 763 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 764 in->seg.log2_page_size = page_shift; 765 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 766 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 767 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen); 768 if (err) { 769 mlx5_ib_warn(dev, "create mkey failed\n"); 770 goto err_2; 771 } 772 mr->umem = umem; 773 mlx5_vfree(in); 774 775 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); 776 777 return mr; 778 779 err_2: 780 mlx5_vfree(in); 781 782 err_1: 783 kfree(mr); 784 785 return ERR_PTR(err); 786 } 787 788 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 789 u64 virt_addr, int access_flags, 790 struct ib_udata *udata) 791 { 792 struct mlx5_ib_dev *dev = to_mdev(pd->device); 793 struct mlx5_ib_mr *mr = NULL; 794 struct ib_umem *umem; 795 int page_shift; 796 int npages; 797 int ncont; 798 int order; 799 int err; 800 801 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n", 802 start, virt_addr, length); 803 umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 804 0); 805 if (IS_ERR(umem)) { 806 mlx5_ib_dbg(dev, "umem get failed\n"); 807 return (void *)umem; 808 } 809 810 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); 811 if (!npages) { 812 mlx5_ib_warn(dev, "avoid zero region\n"); 813 err = -EINVAL; 814 goto error; 815 } 816 817 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 818 npages, ncont, order, page_shift); 819 820 if (use_umr(order)) { 821 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, 822 order, access_flags); 823 if (PTR_ERR(mr) == -EAGAIN) { 824 mlx5_ib_dbg(dev, "cache empty for order %d", order); 825 mr = NULL; 826 } 827 } 828 829 if (!mr) 830 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, 831 access_flags); 832 833 if (IS_ERR(mr)) { 834 err = PTR_ERR(mr); 835 goto error; 836 } 837 838 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); 839 840 mr->umem = umem; 841 mr->npages = npages; 842 spin_lock(&dev->mr_lock); 843 dev->mdev.priv.reg_pages += npages; 844 spin_unlock(&dev->mr_lock); 845 mr->ibmr.lkey = mr->mmr.key; 846 mr->ibmr.rkey = mr->mmr.key; 847 848 return &mr->ibmr; 849 850 error: 851 ib_umem_release(umem); 852 return ERR_PTR(err); 853 } 854 855 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 856 { 857 struct umr_common *umrc = &dev->umrc; 858 struct ib_send_wr wr, *bad; 859 int err; 860 861 memset(&wr, 0, sizeof(wr)); 862 wr.wr_id = (u64)(unsigned long)mr; 863 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); 864 865 down(&umrc->sem); 866 init_completion(&mr->done); 867 err = ib_post_send(umrc->qp, &wr, &bad); 868 if (err) { 869 up(&umrc->sem); 870 mlx5_ib_dbg(dev, "err %d\n", err); 871 goto error; 872 } 873 wait_for_completion(&mr->done); 874 up(&umrc->sem); 875 if (mr->status != IB_WC_SUCCESS) { 876 mlx5_ib_warn(dev, "unreg umr failed\n"); 877 err = -EFAULT; 878 goto error; 879 } 880 return 0; 881 882 error: 883 return err; 884 } 885 886 int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 887 { 888 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 889 struct mlx5_ib_mr *mr = to_mmr(ibmr); 890 struct ib_umem *umem = mr->umem; 891 int npages = mr->npages; 892 int umred = mr->umred; 893 int err; 894 895 if (!umred) { 896 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 897 if (err) { 898 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 899 mr->mmr.key, err); 900 return err; 901 } 902 } else { 903 err = unreg_umr(dev, mr); 904 if (err) { 905 mlx5_ib_warn(dev, "failed unregister\n"); 906 return err; 907 } 908 free_cached_mr(dev, mr); 909 } 910 911 if (umem) { 912 ib_umem_release(umem); 913 spin_lock(&dev->mr_lock); 914 dev->mdev.priv.reg_pages -= npages; 915 spin_unlock(&dev->mr_lock); 916 } 917 918 if (!umred) 919 kfree(mr); 920 921 return 0; 922 } 923 924 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, 925 int max_page_list_len) 926 { 927 struct mlx5_ib_dev *dev = to_mdev(pd->device); 928 struct mlx5_create_mkey_mbox_in *in; 929 struct mlx5_ib_mr *mr; 930 int err; 931 932 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 933 if (!mr) 934 return ERR_PTR(-ENOMEM); 935 936 in = kzalloc(sizeof(*in), GFP_KERNEL); 937 if (!in) { 938 err = -ENOMEM; 939 goto err_free; 940 } 941 942 in->seg.status = 1 << 6; /* free */ 943 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); 944 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 945 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; 946 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 947 /* 948 * TBD not needed - issue 197292 */ 949 in->seg.log2_page_size = PAGE_SHIFT; 950 951 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in)); 952 kfree(in); 953 if (err) 954 goto err_free; 955 956 mr->ibmr.lkey = mr->mmr.key; 957 mr->ibmr.rkey = mr->mmr.key; 958 mr->umem = NULL; 959 960 return &mr->ibmr; 961 962 err_free: 963 kfree(mr); 964 return ERR_PTR(err); 965 } 966 967 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 968 int page_list_len) 969 { 970 struct mlx5_ib_fast_reg_page_list *mfrpl; 971 int size = page_list_len * sizeof(u64); 972 973 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); 974 if (!mfrpl) 975 return ERR_PTR(-ENOMEM); 976 977 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); 978 if (!mfrpl->ibfrpl.page_list) 979 goto err_free; 980 981 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, 982 size, &mfrpl->map, 983 GFP_KERNEL); 984 if (!mfrpl->mapped_page_list) 985 goto err_free; 986 987 WARN_ON(mfrpl->map & 0x3f); 988 989 return &mfrpl->ibfrpl; 990 991 err_free: 992 kfree(mfrpl->ibfrpl.page_list); 993 kfree(mfrpl); 994 return ERR_PTR(-ENOMEM); 995 } 996 997 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) 998 { 999 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); 1000 struct mlx5_ib_dev *dev = to_mdev(page_list->device); 1001 int size = page_list->max_page_list_len * sizeof(u64); 1002 1003 dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list, 1004 mfrpl->map); 1005 kfree(mfrpl->ibfrpl.page_list); 1006 kfree(mfrpl); 1007 } 1008