1 /* 2 * Copyright (c) 2013, Mellanox Technologies inc. All rights reserved. 3 * 4 * This software is available to you under a choice of one of two 5 * licenses. You may choose to be licensed under the terms of the GNU 6 * General Public License (GPL) Version 2, available from the file 7 * COPYING in the main directory of this source tree, or the 8 * OpenIB.org BSD license below: 9 * 10 * Redistribution and use in source and binary forms, with or 11 * without modification, are permitted provided that the following 12 * conditions are met: 13 * 14 * - Redistributions of source code must retain the above 15 * copyright notice, this list of conditions and the following 16 * disclaimer. 17 * 18 * - Redistributions in binary form must reproduce the above 19 * copyright notice, this list of conditions and the following 20 * disclaimer in the documentation and/or other materials 21 * provided with the distribution. 22 * 23 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 24 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF 25 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 26 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS 27 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN 28 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 29 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 30 * SOFTWARE. 31 */ 32 33 34 #include <linux/kref.h> 35 #include <linux/random.h> 36 #include <linux/debugfs.h> 37 #include <linux/export.h> 38 #include <linux/delay.h> 39 #include <rdma/ib_umem.h> 40 #include "mlx5_ib.h" 41 42 enum { 43 MAX_PENDING_REG_MR = 8, 44 }; 45 46 enum { 47 MLX5_UMR_ALIGN = 2048 48 }; 49 50 static __be64 *mr_align(__be64 *ptr, int align) 51 { 52 unsigned long mask = align - 1; 53 54 return (__be64 *)(((unsigned long)ptr + mask) & ~mask); 55 } 56 57 static int order2idx(struct mlx5_ib_dev *dev, int order) 58 { 59 struct mlx5_mr_cache *cache = &dev->cache; 60 61 if (order < cache->ent[0].order) 62 return 0; 63 else 64 return order - cache->ent[0].order; 65 } 66 67 static void reg_mr_callback(int status, void *context) 68 { 69 struct mlx5_ib_mr *mr = context; 70 struct mlx5_ib_dev *dev = mr->dev; 71 struct mlx5_mr_cache *cache = &dev->cache; 72 int c = order2idx(dev, mr->order); 73 struct mlx5_cache_ent *ent = &cache->ent[c]; 74 u8 key; 75 unsigned long flags; 76 77 spin_lock_irqsave(&ent->lock, flags); 78 ent->pending--; 79 spin_unlock_irqrestore(&ent->lock, flags); 80 if (status) { 81 mlx5_ib_warn(dev, "async reg mr failed. status %d\n", status); 82 kfree(mr); 83 dev->fill_delay = 1; 84 mod_timer(&dev->delay_timer, jiffies + HZ); 85 return; 86 } 87 88 if (mr->out.hdr.status) { 89 mlx5_ib_warn(dev, "failed - status %d, syndorme 0x%x\n", 90 mr->out.hdr.status, 91 be32_to_cpu(mr->out.hdr.syndrome)); 92 kfree(mr); 93 dev->fill_delay = 1; 94 mod_timer(&dev->delay_timer, jiffies + HZ); 95 return; 96 } 97 98 spin_lock_irqsave(&dev->mdev.priv.mkey_lock, flags); 99 key = dev->mdev.priv.mkey_key++; 100 spin_unlock_irqrestore(&dev->mdev.priv.mkey_lock, flags); 101 mr->mmr.key = mlx5_idx_to_mkey(be32_to_cpu(mr->out.mkey) & 0xffffff) | key; 102 103 cache->last_add = jiffies; 104 105 spin_lock_irqsave(&ent->lock, flags); 106 list_add_tail(&mr->list, &ent->head); 107 ent->cur++; 108 ent->size++; 109 spin_unlock_irqrestore(&ent->lock, flags); 110 } 111 112 static int add_keys(struct mlx5_ib_dev *dev, int c, int num) 113 { 114 struct mlx5_mr_cache *cache = &dev->cache; 115 struct mlx5_cache_ent *ent = &cache->ent[c]; 116 struct mlx5_create_mkey_mbox_in *in; 117 struct mlx5_ib_mr *mr; 118 int npages = 1 << ent->order; 119 int err = 0; 120 int i; 121 122 in = kzalloc(sizeof(*in), GFP_KERNEL); 123 if (!in) 124 return -ENOMEM; 125 126 for (i = 0; i < num; i++) { 127 if (ent->pending >= MAX_PENDING_REG_MR) { 128 err = -EAGAIN; 129 break; 130 } 131 132 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 133 if (!mr) { 134 err = -ENOMEM; 135 break; 136 } 137 mr->order = ent->order; 138 mr->umred = 1; 139 mr->dev = dev; 140 in->seg.status = 1 << 6; 141 in->seg.xlt_oct_size = cpu_to_be32((npages + 1) / 2); 142 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 143 in->seg.flags = MLX5_ACCESS_MODE_MTT | MLX5_PERM_UMR_EN; 144 in->seg.log2_page_size = 12; 145 146 spin_lock_irq(&ent->lock); 147 ent->pending++; 148 spin_unlock_irq(&ent->lock); 149 mr->start = jiffies; 150 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, 151 sizeof(*in), reg_mr_callback, 152 mr, &mr->out); 153 if (err) { 154 mlx5_ib_warn(dev, "create mkey failed %d\n", err); 155 kfree(mr); 156 break; 157 } 158 } 159 160 kfree(in); 161 return err; 162 } 163 164 static void remove_keys(struct mlx5_ib_dev *dev, int c, int num) 165 { 166 struct mlx5_mr_cache *cache = &dev->cache; 167 struct mlx5_cache_ent *ent = &cache->ent[c]; 168 struct mlx5_ib_mr *mr; 169 int err; 170 int i; 171 172 for (i = 0; i < num; i++) { 173 spin_lock_irq(&ent->lock); 174 if (list_empty(&ent->head)) { 175 spin_unlock_irq(&ent->lock); 176 return; 177 } 178 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 179 list_del(&mr->list); 180 ent->cur--; 181 ent->size--; 182 spin_unlock_irq(&ent->lock); 183 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 184 if (err) 185 mlx5_ib_warn(dev, "failed destroy mkey\n"); 186 else 187 kfree(mr); 188 } 189 } 190 191 static ssize_t size_write(struct file *filp, const char __user *buf, 192 size_t count, loff_t *pos) 193 { 194 struct mlx5_cache_ent *ent = filp->private_data; 195 struct mlx5_ib_dev *dev = ent->dev; 196 char lbuf[20]; 197 u32 var; 198 int err; 199 int c; 200 201 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 202 return -EFAULT; 203 204 c = order2idx(dev, ent->order); 205 lbuf[sizeof(lbuf) - 1] = 0; 206 207 if (sscanf(lbuf, "%u", &var) != 1) 208 return -EINVAL; 209 210 if (var < ent->limit) 211 return -EINVAL; 212 213 if (var > ent->size) { 214 do { 215 err = add_keys(dev, c, var - ent->size); 216 if (err && err != -EAGAIN) 217 return err; 218 219 usleep_range(3000, 5000); 220 } while (err); 221 } else if (var < ent->size) { 222 remove_keys(dev, c, ent->size - var); 223 } 224 225 return count; 226 } 227 228 static ssize_t size_read(struct file *filp, char __user *buf, size_t count, 229 loff_t *pos) 230 { 231 struct mlx5_cache_ent *ent = filp->private_data; 232 char lbuf[20]; 233 int err; 234 235 if (*pos) 236 return 0; 237 238 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->size); 239 if (err < 0) 240 return err; 241 242 if (copy_to_user(buf, lbuf, err)) 243 return -EFAULT; 244 245 *pos += err; 246 247 return err; 248 } 249 250 static const struct file_operations size_fops = { 251 .owner = THIS_MODULE, 252 .open = simple_open, 253 .write = size_write, 254 .read = size_read, 255 }; 256 257 static ssize_t limit_write(struct file *filp, const char __user *buf, 258 size_t count, loff_t *pos) 259 { 260 struct mlx5_cache_ent *ent = filp->private_data; 261 struct mlx5_ib_dev *dev = ent->dev; 262 char lbuf[20]; 263 u32 var; 264 int err; 265 int c; 266 267 if (copy_from_user(lbuf, buf, sizeof(lbuf))) 268 return -EFAULT; 269 270 c = order2idx(dev, ent->order); 271 lbuf[sizeof(lbuf) - 1] = 0; 272 273 if (sscanf(lbuf, "%u", &var) != 1) 274 return -EINVAL; 275 276 if (var > ent->size) 277 return -EINVAL; 278 279 ent->limit = var; 280 281 if (ent->cur < ent->limit) { 282 err = add_keys(dev, c, 2 * ent->limit - ent->cur); 283 if (err) 284 return err; 285 } 286 287 return count; 288 } 289 290 static ssize_t limit_read(struct file *filp, char __user *buf, size_t count, 291 loff_t *pos) 292 { 293 struct mlx5_cache_ent *ent = filp->private_data; 294 char lbuf[20]; 295 int err; 296 297 if (*pos) 298 return 0; 299 300 err = snprintf(lbuf, sizeof(lbuf), "%d\n", ent->limit); 301 if (err < 0) 302 return err; 303 304 if (copy_to_user(buf, lbuf, err)) 305 return -EFAULT; 306 307 *pos += err; 308 309 return err; 310 } 311 312 static const struct file_operations limit_fops = { 313 .owner = THIS_MODULE, 314 .open = simple_open, 315 .write = limit_write, 316 .read = limit_read, 317 }; 318 319 static int someone_adding(struct mlx5_mr_cache *cache) 320 { 321 int i; 322 323 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 324 if (cache->ent[i].cur < cache->ent[i].limit) 325 return 1; 326 } 327 328 return 0; 329 } 330 331 static void __cache_work_func(struct mlx5_cache_ent *ent) 332 { 333 struct mlx5_ib_dev *dev = ent->dev; 334 struct mlx5_mr_cache *cache = &dev->cache; 335 int i = order2idx(dev, ent->order); 336 int err; 337 338 if (cache->stopped) 339 return; 340 341 ent = &dev->cache.ent[i]; 342 if (ent->cur < 2 * ent->limit && !dev->fill_delay) { 343 err = add_keys(dev, i, 1); 344 if (ent->cur < 2 * ent->limit) { 345 if (err == -EAGAIN) { 346 mlx5_ib_dbg(dev, "returned eagain, order %d\n", 347 i + 2); 348 queue_delayed_work(cache->wq, &ent->dwork, 349 msecs_to_jiffies(3)); 350 } else if (err) { 351 mlx5_ib_warn(dev, "command failed order %d, err %d\n", 352 i + 2, err); 353 queue_delayed_work(cache->wq, &ent->dwork, 354 msecs_to_jiffies(1000)); 355 } else { 356 queue_work(cache->wq, &ent->work); 357 } 358 } 359 } else if (ent->cur > 2 * ent->limit) { 360 if (!someone_adding(cache) && 361 time_after(jiffies, cache->last_add + 300 * HZ)) { 362 remove_keys(dev, i, 1); 363 if (ent->cur > ent->limit) 364 queue_work(cache->wq, &ent->work); 365 } else { 366 queue_delayed_work(cache->wq, &ent->dwork, 300 * HZ); 367 } 368 } 369 } 370 371 static void delayed_cache_work_func(struct work_struct *work) 372 { 373 struct mlx5_cache_ent *ent; 374 375 ent = container_of(work, struct mlx5_cache_ent, dwork.work); 376 __cache_work_func(ent); 377 } 378 379 static void cache_work_func(struct work_struct *work) 380 { 381 struct mlx5_cache_ent *ent; 382 383 ent = container_of(work, struct mlx5_cache_ent, work); 384 __cache_work_func(ent); 385 } 386 387 static struct mlx5_ib_mr *alloc_cached_mr(struct mlx5_ib_dev *dev, int order) 388 { 389 struct mlx5_mr_cache *cache = &dev->cache; 390 struct mlx5_ib_mr *mr = NULL; 391 struct mlx5_cache_ent *ent; 392 int c; 393 int i; 394 395 c = order2idx(dev, order); 396 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 397 mlx5_ib_warn(dev, "order %d, cache index %d\n", order, c); 398 return NULL; 399 } 400 401 for (i = c; i < MAX_MR_CACHE_ENTRIES; i++) { 402 ent = &cache->ent[i]; 403 404 mlx5_ib_dbg(dev, "order %d, cache index %d\n", ent->order, i); 405 406 spin_lock_irq(&ent->lock); 407 if (!list_empty(&ent->head)) { 408 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, 409 list); 410 list_del(&mr->list); 411 ent->cur--; 412 spin_unlock_irq(&ent->lock); 413 if (ent->cur < ent->limit) 414 queue_work(cache->wq, &ent->work); 415 break; 416 } 417 spin_unlock_irq(&ent->lock); 418 419 queue_work(cache->wq, &ent->work); 420 421 if (mr) 422 break; 423 } 424 425 if (!mr) 426 cache->ent[c].miss++; 427 428 return mr; 429 } 430 431 static void free_cached_mr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 432 { 433 struct mlx5_mr_cache *cache = &dev->cache; 434 struct mlx5_cache_ent *ent; 435 int shrink = 0; 436 int c; 437 438 c = order2idx(dev, mr->order); 439 if (c < 0 || c >= MAX_MR_CACHE_ENTRIES) { 440 mlx5_ib_warn(dev, "order %d, cache index %d\n", mr->order, c); 441 return; 442 } 443 ent = &cache->ent[c]; 444 spin_lock_irq(&ent->lock); 445 list_add_tail(&mr->list, &ent->head); 446 ent->cur++; 447 if (ent->cur > 2 * ent->limit) 448 shrink = 1; 449 spin_unlock_irq(&ent->lock); 450 451 if (shrink) 452 queue_work(cache->wq, &ent->work); 453 } 454 455 static void clean_keys(struct mlx5_ib_dev *dev, int c) 456 { 457 struct mlx5_mr_cache *cache = &dev->cache; 458 struct mlx5_cache_ent *ent = &cache->ent[c]; 459 struct mlx5_ib_mr *mr; 460 int err; 461 462 cancel_delayed_work(&ent->dwork); 463 while (1) { 464 spin_lock_irq(&ent->lock); 465 if (list_empty(&ent->head)) { 466 spin_unlock_irq(&ent->lock); 467 return; 468 } 469 mr = list_first_entry(&ent->head, struct mlx5_ib_mr, list); 470 list_del(&mr->list); 471 ent->cur--; 472 ent->size--; 473 spin_unlock_irq(&ent->lock); 474 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 475 if (err) 476 mlx5_ib_warn(dev, "failed destroy mkey\n"); 477 else 478 kfree(mr); 479 } 480 } 481 482 static int mlx5_mr_cache_debugfs_init(struct mlx5_ib_dev *dev) 483 { 484 struct mlx5_mr_cache *cache = &dev->cache; 485 struct mlx5_cache_ent *ent; 486 int i; 487 488 if (!mlx5_debugfs_root) 489 return 0; 490 491 cache->root = debugfs_create_dir("mr_cache", dev->mdev.priv.dbg_root); 492 if (!cache->root) 493 return -ENOMEM; 494 495 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 496 ent = &cache->ent[i]; 497 sprintf(ent->name, "%d", ent->order); 498 ent->dir = debugfs_create_dir(ent->name, cache->root); 499 if (!ent->dir) 500 return -ENOMEM; 501 502 ent->fsize = debugfs_create_file("size", 0600, ent->dir, ent, 503 &size_fops); 504 if (!ent->fsize) 505 return -ENOMEM; 506 507 ent->flimit = debugfs_create_file("limit", 0600, ent->dir, ent, 508 &limit_fops); 509 if (!ent->flimit) 510 return -ENOMEM; 511 512 ent->fcur = debugfs_create_u32("cur", 0400, ent->dir, 513 &ent->cur); 514 if (!ent->fcur) 515 return -ENOMEM; 516 517 ent->fmiss = debugfs_create_u32("miss", 0600, ent->dir, 518 &ent->miss); 519 if (!ent->fmiss) 520 return -ENOMEM; 521 } 522 523 return 0; 524 } 525 526 static void mlx5_mr_cache_debugfs_cleanup(struct mlx5_ib_dev *dev) 527 { 528 if (!mlx5_debugfs_root) 529 return; 530 531 debugfs_remove_recursive(dev->cache.root); 532 } 533 534 static void delay_time_func(unsigned long ctx) 535 { 536 struct mlx5_ib_dev *dev = (struct mlx5_ib_dev *)ctx; 537 538 dev->fill_delay = 0; 539 } 540 541 int mlx5_mr_cache_init(struct mlx5_ib_dev *dev) 542 { 543 struct mlx5_mr_cache *cache = &dev->cache; 544 struct mlx5_cache_ent *ent; 545 int limit; 546 int err; 547 int i; 548 549 cache->wq = create_singlethread_workqueue("mkey_cache"); 550 if (!cache->wq) { 551 mlx5_ib_warn(dev, "failed to create work queue\n"); 552 return -ENOMEM; 553 } 554 555 setup_timer(&dev->delay_timer, delay_time_func, (unsigned long)dev); 556 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) { 557 INIT_LIST_HEAD(&cache->ent[i].head); 558 spin_lock_init(&cache->ent[i].lock); 559 560 ent = &cache->ent[i]; 561 INIT_LIST_HEAD(&ent->head); 562 spin_lock_init(&ent->lock); 563 ent->order = i + 2; 564 ent->dev = dev; 565 566 if (dev->mdev.profile->mask & MLX5_PROF_MASK_MR_CACHE) 567 limit = dev->mdev.profile->mr_cache[i].limit; 568 else 569 limit = 0; 570 571 INIT_WORK(&ent->work, cache_work_func); 572 INIT_DELAYED_WORK(&ent->dwork, delayed_cache_work_func); 573 ent->limit = limit; 574 queue_work(cache->wq, &ent->work); 575 } 576 577 err = mlx5_mr_cache_debugfs_init(dev); 578 if (err) 579 mlx5_ib_warn(dev, "cache debugfs failure\n"); 580 581 return 0; 582 } 583 584 int mlx5_mr_cache_cleanup(struct mlx5_ib_dev *dev) 585 { 586 int i; 587 588 dev->cache.stopped = 1; 589 flush_workqueue(dev->cache.wq); 590 591 mlx5_mr_cache_debugfs_cleanup(dev); 592 593 for (i = 0; i < MAX_MR_CACHE_ENTRIES; i++) 594 clean_keys(dev, i); 595 596 destroy_workqueue(dev->cache.wq); 597 del_timer_sync(&dev->delay_timer); 598 599 return 0; 600 } 601 602 struct ib_mr *mlx5_ib_get_dma_mr(struct ib_pd *pd, int acc) 603 { 604 struct mlx5_ib_dev *dev = to_mdev(pd->device); 605 struct mlx5_core_dev *mdev = &dev->mdev; 606 struct mlx5_create_mkey_mbox_in *in; 607 struct mlx5_mkey_seg *seg; 608 struct mlx5_ib_mr *mr; 609 int err; 610 611 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 612 if (!mr) 613 return ERR_PTR(-ENOMEM); 614 615 in = kzalloc(sizeof(*in), GFP_KERNEL); 616 if (!in) { 617 err = -ENOMEM; 618 goto err_free; 619 } 620 621 seg = &in->seg; 622 seg->flags = convert_access(acc) | MLX5_ACCESS_MODE_PA; 623 seg->flags_pd = cpu_to_be32(to_mpd(pd)->pdn | MLX5_MKEY_LEN64); 624 seg->qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 625 seg->start_addr = 0; 626 627 err = mlx5_core_create_mkey(mdev, &mr->mmr, in, sizeof(*in), NULL, NULL, 628 NULL); 629 if (err) 630 goto err_in; 631 632 kfree(in); 633 mr->ibmr.lkey = mr->mmr.key; 634 mr->ibmr.rkey = mr->mmr.key; 635 mr->umem = NULL; 636 637 return &mr->ibmr; 638 639 err_in: 640 kfree(in); 641 642 err_free: 643 kfree(mr); 644 645 return ERR_PTR(err); 646 } 647 648 static int get_octo_len(u64 addr, u64 len, int page_size) 649 { 650 u64 offset; 651 int npages; 652 653 offset = addr & (page_size - 1); 654 npages = ALIGN(len + offset, page_size) >> ilog2(page_size); 655 return (npages + 1) / 2; 656 } 657 658 static int use_umr(int order) 659 { 660 return order <= 17; 661 } 662 663 static void prep_umr_reg_wqe(struct ib_pd *pd, struct ib_send_wr *wr, 664 struct ib_sge *sg, u64 dma, int n, u32 key, 665 int page_shift, u64 virt_addr, u64 len, 666 int access_flags) 667 { 668 struct mlx5_ib_dev *dev = to_mdev(pd->device); 669 struct ib_mr *mr = dev->umrc.mr; 670 671 sg->addr = dma; 672 sg->length = ALIGN(sizeof(u64) * n, 64); 673 sg->lkey = mr->lkey; 674 675 wr->next = NULL; 676 wr->send_flags = 0; 677 wr->sg_list = sg; 678 if (n) 679 wr->num_sge = 1; 680 else 681 wr->num_sge = 0; 682 683 wr->opcode = MLX5_IB_WR_UMR; 684 wr->wr.fast_reg.page_list_len = n; 685 wr->wr.fast_reg.page_shift = page_shift; 686 wr->wr.fast_reg.rkey = key; 687 wr->wr.fast_reg.iova_start = virt_addr; 688 wr->wr.fast_reg.length = len; 689 wr->wr.fast_reg.access_flags = access_flags; 690 wr->wr.fast_reg.page_list = (struct ib_fast_reg_page_list *)pd; 691 } 692 693 static void prep_umr_unreg_wqe(struct mlx5_ib_dev *dev, 694 struct ib_send_wr *wr, u32 key) 695 { 696 wr->send_flags = MLX5_IB_SEND_UMR_UNREG; 697 wr->opcode = MLX5_IB_WR_UMR; 698 wr->wr.fast_reg.rkey = key; 699 } 700 701 void mlx5_umr_cq_handler(struct ib_cq *cq, void *cq_context) 702 { 703 struct mlx5_ib_mr *mr; 704 struct ib_wc wc; 705 int err; 706 707 while (1) { 708 err = ib_poll_cq(cq, 1, &wc); 709 if (err < 0) { 710 pr_warn("poll cq error %d\n", err); 711 return; 712 } 713 if (err == 0) 714 break; 715 716 mr = (struct mlx5_ib_mr *)(unsigned long)wc.wr_id; 717 mr->status = wc.status; 718 complete(&mr->done); 719 } 720 ib_req_notify_cq(cq, IB_CQ_NEXT_COMP); 721 } 722 723 static struct mlx5_ib_mr *reg_umr(struct ib_pd *pd, struct ib_umem *umem, 724 u64 virt_addr, u64 len, int npages, 725 int page_shift, int order, int access_flags) 726 { 727 struct mlx5_ib_dev *dev = to_mdev(pd->device); 728 struct device *ddev = dev->ib_dev.dma_device; 729 struct umr_common *umrc = &dev->umrc; 730 struct ib_send_wr wr, *bad; 731 struct mlx5_ib_mr *mr; 732 struct ib_sge sg; 733 int size = sizeof(u64) * npages; 734 int err; 735 int i; 736 737 for (i = 0; i < 1; i++) { 738 mr = alloc_cached_mr(dev, order); 739 if (mr) 740 break; 741 742 err = add_keys(dev, order2idx(dev, order), 1); 743 if (err && err != -EAGAIN) { 744 mlx5_ib_warn(dev, "add_keys failed, err %d\n", err); 745 break; 746 } 747 } 748 749 if (!mr) 750 return ERR_PTR(-EAGAIN); 751 752 mr->pas = kmalloc(size + MLX5_UMR_ALIGN - 1, GFP_KERNEL); 753 if (!mr->pas) { 754 err = -ENOMEM; 755 goto error; 756 } 757 758 mlx5_ib_populate_pas(dev, umem, page_shift, 759 mr_align(mr->pas, MLX5_UMR_ALIGN), 1); 760 761 mr->dma = dma_map_single(ddev, mr_align(mr->pas, MLX5_UMR_ALIGN), size, 762 DMA_TO_DEVICE); 763 if (dma_mapping_error(ddev, mr->dma)) { 764 kfree(mr->pas); 765 err = -ENOMEM; 766 goto error; 767 } 768 769 memset(&wr, 0, sizeof(wr)); 770 wr.wr_id = (u64)(unsigned long)mr; 771 prep_umr_reg_wqe(pd, &wr, &sg, mr->dma, npages, mr->mmr.key, page_shift, virt_addr, len, access_flags); 772 773 /* We serialize polls so one process does not kidnap another's 774 * completion. This is not a problem since wr is completed in 775 * around 1 usec 776 */ 777 down(&umrc->sem); 778 init_completion(&mr->done); 779 err = ib_post_send(umrc->qp, &wr, &bad); 780 if (err) { 781 mlx5_ib_warn(dev, "post send failed, err %d\n", err); 782 up(&umrc->sem); 783 goto error; 784 } 785 wait_for_completion(&mr->done); 786 up(&umrc->sem); 787 788 dma_unmap_single(ddev, mr->dma, size, DMA_TO_DEVICE); 789 kfree(mr->pas); 790 791 if (mr->status != IB_WC_SUCCESS) { 792 mlx5_ib_warn(dev, "reg umr failed\n"); 793 err = -EFAULT; 794 goto error; 795 } 796 797 return mr; 798 799 error: 800 free_cached_mr(dev, mr); 801 return ERR_PTR(err); 802 } 803 804 static struct mlx5_ib_mr *reg_create(struct ib_pd *pd, u64 virt_addr, 805 u64 length, struct ib_umem *umem, 806 int npages, int page_shift, 807 int access_flags) 808 { 809 struct mlx5_ib_dev *dev = to_mdev(pd->device); 810 struct mlx5_create_mkey_mbox_in *in; 811 struct mlx5_ib_mr *mr; 812 int inlen; 813 int err; 814 815 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 816 if (!mr) 817 return ERR_PTR(-ENOMEM); 818 819 inlen = sizeof(*in) + sizeof(*in->pas) * ((npages + 1) / 2) * 2; 820 in = mlx5_vzalloc(inlen); 821 if (!in) { 822 err = -ENOMEM; 823 goto err_1; 824 } 825 mlx5_ib_populate_pas(dev, umem, page_shift, in->pas, 0); 826 827 in->seg.flags = convert_access(access_flags) | 828 MLX5_ACCESS_MODE_MTT; 829 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 830 in->seg.start_addr = cpu_to_be64(virt_addr); 831 in->seg.len = cpu_to_be64(length); 832 in->seg.bsfs_octo_size = 0; 833 in->seg.xlt_oct_size = cpu_to_be32(get_octo_len(virt_addr, length, 1 << page_shift)); 834 in->seg.log2_page_size = page_shift; 835 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 836 in->xlat_oct_act_size = cpu_to_be32(get_octo_len(virt_addr, length, 837 1 << page_shift)); 838 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, inlen, NULL, 839 NULL, NULL); 840 if (err) { 841 mlx5_ib_warn(dev, "create mkey failed\n"); 842 goto err_2; 843 } 844 mr->umem = umem; 845 mlx5_vfree(in); 846 847 mlx5_ib_dbg(dev, "mkey = 0x%x\n", mr->mmr.key); 848 849 return mr; 850 851 err_2: 852 mlx5_vfree(in); 853 854 err_1: 855 kfree(mr); 856 857 return ERR_PTR(err); 858 } 859 860 struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 861 u64 virt_addr, int access_flags, 862 struct ib_udata *udata) 863 { 864 struct mlx5_ib_dev *dev = to_mdev(pd->device); 865 struct mlx5_ib_mr *mr = NULL; 866 struct ib_umem *umem; 867 int page_shift; 868 int npages; 869 int ncont; 870 int order; 871 int err; 872 873 mlx5_ib_dbg(dev, "start 0x%llx, virt_addr 0x%llx, length 0x%llx\n", 874 start, virt_addr, length); 875 umem = ib_umem_get(pd->uobject->context, start, length, access_flags, 876 0); 877 if (IS_ERR(umem)) { 878 mlx5_ib_dbg(dev, "umem get failed\n"); 879 return (void *)umem; 880 } 881 882 mlx5_ib_cont_pages(umem, start, &npages, &page_shift, &ncont, &order); 883 if (!npages) { 884 mlx5_ib_warn(dev, "avoid zero region\n"); 885 err = -EINVAL; 886 goto error; 887 } 888 889 mlx5_ib_dbg(dev, "npages %d, ncont %d, order %d, page_shift %d\n", 890 npages, ncont, order, page_shift); 891 892 if (use_umr(order)) { 893 mr = reg_umr(pd, umem, virt_addr, length, ncont, page_shift, 894 order, access_flags); 895 if (PTR_ERR(mr) == -EAGAIN) { 896 mlx5_ib_dbg(dev, "cache empty for order %d", order); 897 mr = NULL; 898 } 899 } 900 901 if (!mr) 902 mr = reg_create(pd, virt_addr, length, umem, ncont, page_shift, 903 access_flags); 904 905 if (IS_ERR(mr)) { 906 err = PTR_ERR(mr); 907 goto error; 908 } 909 910 mlx5_ib_dbg(dev, "mkey 0x%x\n", mr->mmr.key); 911 912 mr->umem = umem; 913 mr->npages = npages; 914 spin_lock(&dev->mr_lock); 915 dev->mdev.priv.reg_pages += npages; 916 spin_unlock(&dev->mr_lock); 917 mr->ibmr.lkey = mr->mmr.key; 918 mr->ibmr.rkey = mr->mmr.key; 919 920 return &mr->ibmr; 921 922 error: 923 ib_umem_release(umem); 924 return ERR_PTR(err); 925 } 926 927 static int unreg_umr(struct mlx5_ib_dev *dev, struct mlx5_ib_mr *mr) 928 { 929 struct umr_common *umrc = &dev->umrc; 930 struct ib_send_wr wr, *bad; 931 int err; 932 933 memset(&wr, 0, sizeof(wr)); 934 wr.wr_id = (u64)(unsigned long)mr; 935 prep_umr_unreg_wqe(dev, &wr, mr->mmr.key); 936 937 down(&umrc->sem); 938 init_completion(&mr->done); 939 err = ib_post_send(umrc->qp, &wr, &bad); 940 if (err) { 941 up(&umrc->sem); 942 mlx5_ib_dbg(dev, "err %d\n", err); 943 goto error; 944 } 945 wait_for_completion(&mr->done); 946 up(&umrc->sem); 947 if (mr->status != IB_WC_SUCCESS) { 948 mlx5_ib_warn(dev, "unreg umr failed\n"); 949 err = -EFAULT; 950 goto error; 951 } 952 return 0; 953 954 error: 955 return err; 956 } 957 958 int mlx5_ib_dereg_mr(struct ib_mr *ibmr) 959 { 960 struct mlx5_ib_dev *dev = to_mdev(ibmr->device); 961 struct mlx5_ib_mr *mr = to_mmr(ibmr); 962 struct ib_umem *umem = mr->umem; 963 int npages = mr->npages; 964 int umred = mr->umred; 965 int err; 966 967 if (!umred) { 968 err = mlx5_core_destroy_mkey(&dev->mdev, &mr->mmr); 969 if (err) { 970 mlx5_ib_warn(dev, "failed to destroy mkey 0x%x (%d)\n", 971 mr->mmr.key, err); 972 return err; 973 } 974 } else { 975 err = unreg_umr(dev, mr); 976 if (err) { 977 mlx5_ib_warn(dev, "failed unregister\n"); 978 return err; 979 } 980 free_cached_mr(dev, mr); 981 } 982 983 if (umem) { 984 ib_umem_release(umem); 985 spin_lock(&dev->mr_lock); 986 dev->mdev.priv.reg_pages -= npages; 987 spin_unlock(&dev->mr_lock); 988 } 989 990 if (!umred) 991 kfree(mr); 992 993 return 0; 994 } 995 996 struct ib_mr *mlx5_ib_alloc_fast_reg_mr(struct ib_pd *pd, 997 int max_page_list_len) 998 { 999 struct mlx5_ib_dev *dev = to_mdev(pd->device); 1000 struct mlx5_create_mkey_mbox_in *in; 1001 struct mlx5_ib_mr *mr; 1002 int err; 1003 1004 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 1005 if (!mr) 1006 return ERR_PTR(-ENOMEM); 1007 1008 in = kzalloc(sizeof(*in), GFP_KERNEL); 1009 if (!in) { 1010 err = -ENOMEM; 1011 goto err_free; 1012 } 1013 1014 in->seg.status = 1 << 6; /* free */ 1015 in->seg.xlt_oct_size = cpu_to_be32((max_page_list_len + 1) / 2); 1016 in->seg.qpn_mkey7_0 = cpu_to_be32(0xffffff << 8); 1017 in->seg.flags = MLX5_PERM_UMR_EN | MLX5_ACCESS_MODE_MTT; 1018 in->seg.flags_pd = cpu_to_be32(to_mpd(pd)->pdn); 1019 /* 1020 * TBD not needed - issue 197292 */ 1021 in->seg.log2_page_size = PAGE_SHIFT; 1022 1023 err = mlx5_core_create_mkey(&dev->mdev, &mr->mmr, in, sizeof(*in), NULL, 1024 NULL, NULL); 1025 kfree(in); 1026 if (err) 1027 goto err_free; 1028 1029 mr->ibmr.lkey = mr->mmr.key; 1030 mr->ibmr.rkey = mr->mmr.key; 1031 mr->umem = NULL; 1032 1033 return &mr->ibmr; 1034 1035 err_free: 1036 kfree(mr); 1037 return ERR_PTR(err); 1038 } 1039 1040 struct ib_fast_reg_page_list *mlx5_ib_alloc_fast_reg_page_list(struct ib_device *ibdev, 1041 int page_list_len) 1042 { 1043 struct mlx5_ib_fast_reg_page_list *mfrpl; 1044 int size = page_list_len * sizeof(u64); 1045 1046 mfrpl = kmalloc(sizeof(*mfrpl), GFP_KERNEL); 1047 if (!mfrpl) 1048 return ERR_PTR(-ENOMEM); 1049 1050 mfrpl->ibfrpl.page_list = kmalloc(size, GFP_KERNEL); 1051 if (!mfrpl->ibfrpl.page_list) 1052 goto err_free; 1053 1054 mfrpl->mapped_page_list = dma_alloc_coherent(ibdev->dma_device, 1055 size, &mfrpl->map, 1056 GFP_KERNEL); 1057 if (!mfrpl->mapped_page_list) 1058 goto err_free; 1059 1060 WARN_ON(mfrpl->map & 0x3f); 1061 1062 return &mfrpl->ibfrpl; 1063 1064 err_free: 1065 kfree(mfrpl->ibfrpl.page_list); 1066 kfree(mfrpl); 1067 return ERR_PTR(-ENOMEM); 1068 } 1069 1070 void mlx5_ib_free_fast_reg_page_list(struct ib_fast_reg_page_list *page_list) 1071 { 1072 struct mlx5_ib_fast_reg_page_list *mfrpl = to_mfrpl(page_list); 1073 struct mlx5_ib_dev *dev = to_mdev(page_list->device); 1074 int size = page_list->max_page_list_len * sizeof(u64); 1075 1076 dma_free_coherent(&dev->mdev.pdev->dev, size, mfrpl->mapped_page_list, 1077 mfrpl->map); 1078 kfree(mfrpl->ibfrpl.page_list); 1079 kfree(mfrpl); 1080 } 1081