1 /* 2 * Copyright(c) 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/slab.h> 49 #include <linux/vmalloc.h> 50 #include <rdma/ib_umem.h> 51 #include <rdma/rdma_vt.h> 52 #include "vt.h" 53 #include "mr.h" 54 #include "trace.h" 55 56 /** 57 * rvt_driver_mr_init - Init MR resources per driver 58 * @rdi: rvt dev struct 59 * 60 * Do any intilization needed when a driver registers with rdmavt. 61 * 62 * Return: 0 on success or errno on failure 63 */ 64 int rvt_driver_mr_init(struct rvt_dev_info *rdi) 65 { 66 unsigned int lkey_table_size = rdi->dparms.lkey_table_size; 67 unsigned lk_tab_size; 68 int i; 69 70 /* 71 * The top hfi1_lkey_table_size bits are used to index the 72 * table. The lower 8 bits can be owned by the user (copied from 73 * the LKEY). The remaining bits act as a generation number or tag. 74 */ 75 if (!lkey_table_size) 76 return -EINVAL; 77 78 spin_lock_init(&rdi->lkey_table.lock); 79 80 /* ensure generation is at least 4 bits */ 81 if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) { 82 rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n", 83 lkey_table_size, RVT_MAX_LKEY_TABLE_BITS); 84 rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS; 85 lkey_table_size = rdi->dparms.lkey_table_size; 86 } 87 rdi->lkey_table.max = 1 << lkey_table_size; 88 rdi->lkey_table.shift = 32 - lkey_table_size; 89 lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); 90 rdi->lkey_table.table = (struct rvt_mregion __rcu **) 91 vmalloc_node(lk_tab_size, rdi->dparms.node); 92 if (!rdi->lkey_table.table) 93 return -ENOMEM; 94 95 RCU_INIT_POINTER(rdi->dma_mr, NULL); 96 for (i = 0; i < rdi->lkey_table.max; i++) 97 RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); 98 99 return 0; 100 } 101 102 /** 103 *rvt_mr_exit: clean up MR 104 *@rdi: rvt dev structure 105 * 106 * called when drivers have unregistered or perhaps failed to register with us 107 */ 108 void rvt_mr_exit(struct rvt_dev_info *rdi) 109 { 110 if (rdi->dma_mr) 111 rvt_pr_err(rdi, "DMA MR not null!\n"); 112 113 vfree(rdi->lkey_table.table); 114 } 115 116 static void rvt_deinit_mregion(struct rvt_mregion *mr) 117 { 118 int i = mr->mapsz; 119 120 mr->mapsz = 0; 121 while (i) 122 kfree(mr->map[--i]); 123 percpu_ref_exit(&mr->refcount); 124 } 125 126 static void __rvt_mregion_complete(struct percpu_ref *ref) 127 { 128 struct rvt_mregion *mr = container_of(ref, struct rvt_mregion, 129 refcount); 130 131 complete(&mr->comp); 132 } 133 134 static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, 135 int count, unsigned int percpu_flags) 136 { 137 int m, i = 0; 138 struct rvt_dev_info *dev = ib_to_rvt(pd->device); 139 140 mr->mapsz = 0; 141 m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; 142 for (; i < m; i++) { 143 mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, 144 dev->dparms.node); 145 if (!mr->map[i]) 146 goto bail; 147 mr->mapsz++; 148 } 149 init_completion(&mr->comp); 150 /* count returning the ptr to user */ 151 if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete, 152 percpu_flags, GFP_KERNEL)) 153 goto bail; 154 155 atomic_set(&mr->lkey_invalid, 0); 156 mr->pd = pd; 157 mr->max_segs = count; 158 return 0; 159 bail: 160 rvt_deinit_mregion(mr); 161 return -ENOMEM; 162 } 163 164 /** 165 * rvt_alloc_lkey - allocate an lkey 166 * @mr: memory region that this lkey protects 167 * @dma_region: 0->normal key, 1->restricted DMA key 168 * 169 * Returns 0 if successful, otherwise returns -errno. 170 * 171 * Increments mr reference count as required. 172 * 173 * Sets the lkey field mr for non-dma regions. 174 * 175 */ 176 static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region) 177 { 178 unsigned long flags; 179 u32 r; 180 u32 n; 181 int ret = 0; 182 struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); 183 struct rvt_lkey_table *rkt = &dev->lkey_table; 184 185 rvt_get_mr(mr); 186 spin_lock_irqsave(&rkt->lock, flags); 187 188 /* special case for dma_mr lkey == 0 */ 189 if (dma_region) { 190 struct rvt_mregion *tmr; 191 192 tmr = rcu_access_pointer(dev->dma_mr); 193 if (!tmr) { 194 rcu_assign_pointer(dev->dma_mr, mr); 195 mr->lkey_published = 1; 196 rvt_get_mr(mr); 197 } 198 goto success; 199 } 200 201 /* Find the next available LKEY */ 202 r = rkt->next; 203 n = r; 204 for (;;) { 205 if (!rcu_access_pointer(rkt->table[r])) 206 break; 207 r = (r + 1) & (rkt->max - 1); 208 if (r == n) 209 goto bail; 210 } 211 rkt->next = (r + 1) & (rkt->max - 1); 212 /* 213 * Make sure lkey is never zero which is reserved to indicate an 214 * unrestricted LKEY. 215 */ 216 rkt->gen++; 217 /* 218 * bits are capped to ensure enough bits for generation number 219 */ 220 mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) | 221 ((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen) 222 << 8); 223 if (mr->lkey == 0) { 224 mr->lkey |= 1 << 8; 225 rkt->gen++; 226 } 227 rcu_assign_pointer(rkt->table[r], mr); 228 mr->lkey_published = 1; 229 success: 230 spin_unlock_irqrestore(&rkt->lock, flags); 231 out: 232 return ret; 233 bail: 234 rvt_put_mr(mr); 235 spin_unlock_irqrestore(&rkt->lock, flags); 236 ret = -ENOMEM; 237 goto out; 238 } 239 240 /** 241 * rvt_free_lkey - free an lkey 242 * @mr: mr to free from tables 243 */ 244 static void rvt_free_lkey(struct rvt_mregion *mr) 245 { 246 unsigned long flags; 247 u32 lkey = mr->lkey; 248 u32 r; 249 struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); 250 struct rvt_lkey_table *rkt = &dev->lkey_table; 251 int freed = 0; 252 253 spin_lock_irqsave(&rkt->lock, flags); 254 if (!lkey) { 255 if (mr->lkey_published) { 256 RCU_INIT_POINTER(dev->dma_mr, NULL); 257 rvt_put_mr(mr); 258 } 259 } else { 260 if (!mr->lkey_published) 261 goto out; 262 r = lkey >> (32 - dev->dparms.lkey_table_size); 263 RCU_INIT_POINTER(rkt->table[r], NULL); 264 } 265 mr->lkey_published = 0; 266 freed++; 267 out: 268 spin_unlock_irqrestore(&rkt->lock, flags); 269 if (freed) { 270 synchronize_rcu(); 271 percpu_ref_kill(&mr->refcount); 272 } 273 } 274 275 static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd) 276 { 277 struct rvt_mr *mr; 278 int rval = -ENOMEM; 279 int m; 280 281 /* Allocate struct plus pointers to first level page tables. */ 282 m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; 283 mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL); 284 if (!mr) 285 goto bail; 286 287 rval = rvt_init_mregion(&mr->mr, pd, count, 0); 288 if (rval) 289 goto bail; 290 /* 291 * ib_reg_phys_mr() will initialize mr->ibmr except for 292 * lkey and rkey. 293 */ 294 rval = rvt_alloc_lkey(&mr->mr, 0); 295 if (rval) 296 goto bail_mregion; 297 mr->ibmr.lkey = mr->mr.lkey; 298 mr->ibmr.rkey = mr->mr.lkey; 299 done: 300 return mr; 301 302 bail_mregion: 303 rvt_deinit_mregion(&mr->mr); 304 bail: 305 kfree(mr); 306 mr = ERR_PTR(rval); 307 goto done; 308 } 309 310 static void __rvt_free_mr(struct rvt_mr *mr) 311 { 312 rvt_free_lkey(&mr->mr); 313 rvt_deinit_mregion(&mr->mr); 314 kfree(mr); 315 } 316 317 /** 318 * rvt_get_dma_mr - get a DMA memory region 319 * @pd: protection domain for this memory region 320 * @acc: access flags 321 * 322 * Return: the memory region on success, otherwise returns an errno. 323 * Note that all DMA addresses should be created via the functions in 324 * struct dma_virt_ops. 325 */ 326 struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) 327 { 328 struct rvt_mr *mr; 329 struct ib_mr *ret; 330 int rval; 331 332 if (ibpd_to_rvtpd(pd)->user) 333 return ERR_PTR(-EPERM); 334 335 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 336 if (!mr) { 337 ret = ERR_PTR(-ENOMEM); 338 goto bail; 339 } 340 341 rval = rvt_init_mregion(&mr->mr, pd, 0, 0); 342 if (rval) { 343 ret = ERR_PTR(rval); 344 goto bail; 345 } 346 347 rval = rvt_alloc_lkey(&mr->mr, 1); 348 if (rval) { 349 ret = ERR_PTR(rval); 350 goto bail_mregion; 351 } 352 353 mr->mr.access_flags = acc; 354 ret = &mr->ibmr; 355 done: 356 return ret; 357 358 bail_mregion: 359 rvt_deinit_mregion(&mr->mr); 360 bail: 361 kfree(mr); 362 goto done; 363 } 364 365 /** 366 * rvt_reg_user_mr - register a userspace memory region 367 * @pd: protection domain for this memory region 368 * @start: starting userspace address 369 * @length: length of region to register 370 * @mr_access_flags: access flags for this memory region 371 * @udata: unused by the driver 372 * 373 * Return: the memory region on success, otherwise returns an errno. 374 */ 375 struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 376 u64 virt_addr, int mr_access_flags, 377 struct ib_udata *udata) 378 { 379 struct rvt_mr *mr; 380 struct ib_umem *umem; 381 struct scatterlist *sg; 382 int n, m, entry; 383 struct ib_mr *ret; 384 385 if (length == 0) 386 return ERR_PTR(-EINVAL); 387 388 umem = ib_umem_get(pd->uobject->context, start, length, 389 mr_access_flags, 0); 390 if (IS_ERR(umem)) 391 return (void *)umem; 392 393 n = umem->nmap; 394 395 mr = __rvt_alloc_mr(n, pd); 396 if (IS_ERR(mr)) { 397 ret = (struct ib_mr *)mr; 398 goto bail_umem; 399 } 400 401 mr->mr.user_base = start; 402 mr->mr.iova = virt_addr; 403 mr->mr.length = length; 404 mr->mr.offset = ib_umem_offset(umem); 405 mr->mr.access_flags = mr_access_flags; 406 mr->umem = umem; 407 408 if (is_power_of_2(umem->page_size)) 409 mr->mr.page_shift = ilog2(umem->page_size); 410 m = 0; 411 n = 0; 412 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 413 void *vaddr; 414 415 vaddr = page_address(sg_page(sg)); 416 if (!vaddr) { 417 ret = ERR_PTR(-EINVAL); 418 goto bail_inval; 419 } 420 mr->mr.map[m]->segs[n].vaddr = vaddr; 421 mr->mr.map[m]->segs[n].length = umem->page_size; 422 trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, umem->page_size); 423 n++; 424 if (n == RVT_SEGSZ) { 425 m++; 426 n = 0; 427 } 428 } 429 return &mr->ibmr; 430 431 bail_inval: 432 __rvt_free_mr(mr); 433 434 bail_umem: 435 ib_umem_release(umem); 436 437 return ret; 438 } 439 440 /** 441 * rvt_dereg_mr - unregister and free a memory region 442 * @ibmr: the memory region to free 443 * 444 * 445 * Note that this is called to free MRs created by rvt_get_dma_mr() 446 * or rvt_reg_user_mr(). 447 * 448 * Returns 0 on success. 449 */ 450 int rvt_dereg_mr(struct ib_mr *ibmr) 451 { 452 struct rvt_mr *mr = to_imr(ibmr); 453 struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device); 454 int ret = 0; 455 unsigned long timeout; 456 457 rvt_free_lkey(&mr->mr); 458 459 rvt_put_mr(&mr->mr); /* will set completion if last */ 460 timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ); 461 if (!timeout) { 462 rvt_pr_err(rdi, 463 "rvt_dereg_mr timeout mr %p pd %p\n", 464 mr, mr->mr.pd); 465 rvt_get_mr(&mr->mr); 466 ret = -EBUSY; 467 goto out; 468 } 469 rvt_deinit_mregion(&mr->mr); 470 if (mr->umem) 471 ib_umem_release(mr->umem); 472 kfree(mr); 473 out: 474 return ret; 475 } 476 477 /** 478 * rvt_alloc_mr - Allocate a memory region usable with the 479 * @pd: protection domain for this memory region 480 * @mr_type: mem region type 481 * @max_num_sg: Max number of segments allowed 482 * 483 * Return: the memory region on success, otherwise return an errno. 484 */ 485 struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, 486 enum ib_mr_type mr_type, 487 u32 max_num_sg) 488 { 489 struct rvt_mr *mr; 490 491 if (mr_type != IB_MR_TYPE_MEM_REG) 492 return ERR_PTR(-EINVAL); 493 494 mr = __rvt_alloc_mr(max_num_sg, pd); 495 if (IS_ERR(mr)) 496 return (struct ib_mr *)mr; 497 498 return &mr->ibmr; 499 } 500 501 /** 502 * rvt_set_page - page assignment function called by ib_sg_to_pages 503 * @ibmr: memory region 504 * @addr: dma address of mapped page 505 * 506 * Return: 0 on success 507 */ 508 static int rvt_set_page(struct ib_mr *ibmr, u64 addr) 509 { 510 struct rvt_mr *mr = to_imr(ibmr); 511 u32 ps = 1 << mr->mr.page_shift; 512 u32 mapped_segs = mr->mr.length >> mr->mr.page_shift; 513 int m, n; 514 515 if (unlikely(mapped_segs == mr->mr.max_segs)) 516 return -ENOMEM; 517 518 if (mr->mr.length == 0) { 519 mr->mr.user_base = addr; 520 mr->mr.iova = addr; 521 } 522 523 m = mapped_segs / RVT_SEGSZ; 524 n = mapped_segs % RVT_SEGSZ; 525 mr->mr.map[m]->segs[n].vaddr = (void *)addr; 526 mr->mr.map[m]->segs[n].length = ps; 527 trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); 528 mr->mr.length += ps; 529 530 return 0; 531 } 532 533 /** 534 * rvt_map_mr_sg - map sg list and set it the memory region 535 * @ibmr: memory region 536 * @sg: dma mapped scatterlist 537 * @sg_nents: number of entries in sg 538 * @sg_offset: offset in bytes into sg 539 * 540 * Return: number of sg elements mapped to the memory region 541 */ 542 int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 543 int sg_nents, unsigned int *sg_offset) 544 { 545 struct rvt_mr *mr = to_imr(ibmr); 546 547 mr->mr.length = 0; 548 mr->mr.page_shift = PAGE_SHIFT; 549 return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 550 rvt_set_page); 551 } 552 553 /** 554 * rvt_fast_reg_mr - fast register physical MR 555 * @qp: the queue pair where the work request comes from 556 * @ibmr: the memory region to be registered 557 * @key: updated key for this memory region 558 * @access: access flags for this memory region 559 * 560 * Returns 0 on success. 561 */ 562 int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, 563 int access) 564 { 565 struct rvt_mr *mr = to_imr(ibmr); 566 567 if (qp->ibqp.pd != mr->mr.pd) 568 return -EACCES; 569 570 /* not applicable to dma MR or user MR */ 571 if (!mr->mr.lkey || mr->umem) 572 return -EINVAL; 573 574 if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00)) 575 return -EINVAL; 576 577 ibmr->lkey = key; 578 ibmr->rkey = key; 579 mr->mr.lkey = key; 580 mr->mr.access_flags = access; 581 atomic_set(&mr->mr.lkey_invalid, 0); 582 583 return 0; 584 } 585 EXPORT_SYMBOL(rvt_fast_reg_mr); 586 587 /** 588 * rvt_invalidate_rkey - invalidate an MR rkey 589 * @qp: queue pair associated with the invalidate op 590 * @rkey: rkey to invalidate 591 * 592 * Returns 0 on success. 593 */ 594 int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey) 595 { 596 struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); 597 struct rvt_lkey_table *rkt = &dev->lkey_table; 598 struct rvt_mregion *mr; 599 600 if (rkey == 0) 601 return -EINVAL; 602 603 rcu_read_lock(); 604 mr = rcu_dereference( 605 rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); 606 if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 607 goto bail; 608 609 atomic_set(&mr->lkey_invalid, 1); 610 rcu_read_unlock(); 611 return 0; 612 613 bail: 614 rcu_read_unlock(); 615 return -EINVAL; 616 } 617 EXPORT_SYMBOL(rvt_invalidate_rkey); 618 619 /** 620 * rvt_alloc_fmr - allocate a fast memory region 621 * @pd: the protection domain for this memory region 622 * @mr_access_flags: access flags for this memory region 623 * @fmr_attr: fast memory region attributes 624 * 625 * Return: the memory region on success, otherwise returns an errno. 626 */ 627 struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, 628 struct ib_fmr_attr *fmr_attr) 629 { 630 struct rvt_fmr *fmr; 631 int m; 632 struct ib_fmr *ret; 633 int rval = -ENOMEM; 634 635 /* Allocate struct plus pointers to first level page tables. */ 636 m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ; 637 fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL); 638 if (!fmr) 639 goto bail; 640 641 rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages, 642 PERCPU_REF_INIT_ATOMIC); 643 if (rval) 644 goto bail; 645 646 /* 647 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & 648 * rkey. 649 */ 650 rval = rvt_alloc_lkey(&fmr->mr, 0); 651 if (rval) 652 goto bail_mregion; 653 fmr->ibfmr.rkey = fmr->mr.lkey; 654 fmr->ibfmr.lkey = fmr->mr.lkey; 655 /* 656 * Resources are allocated but no valid mapping (RKEY can't be 657 * used). 658 */ 659 fmr->mr.access_flags = mr_access_flags; 660 fmr->mr.max_segs = fmr_attr->max_pages; 661 fmr->mr.page_shift = fmr_attr->page_shift; 662 663 ret = &fmr->ibfmr; 664 done: 665 return ret; 666 667 bail_mregion: 668 rvt_deinit_mregion(&fmr->mr); 669 bail: 670 kfree(fmr); 671 ret = ERR_PTR(rval); 672 goto done; 673 } 674 675 /** 676 * rvt_map_phys_fmr - set up a fast memory region 677 * @ibmfr: the fast memory region to set up 678 * @page_list: the list of pages to associate with the fast memory region 679 * @list_len: the number of pages to associate with the fast memory region 680 * @iova: the virtual address of the start of the fast memory region 681 * 682 * This may be called from interrupt context. 683 * 684 * Return: 0 on success 685 */ 686 687 int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, 688 int list_len, u64 iova) 689 { 690 struct rvt_fmr *fmr = to_ifmr(ibfmr); 691 struct rvt_lkey_table *rkt; 692 unsigned long flags; 693 int m, n; 694 unsigned long i; 695 u32 ps; 696 struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); 697 698 i = atomic_long_read(&fmr->mr.refcount.count); 699 if (i > 2) 700 return -EBUSY; 701 702 if (list_len > fmr->mr.max_segs) 703 return -EINVAL; 704 705 rkt = &rdi->lkey_table; 706 spin_lock_irqsave(&rkt->lock, flags); 707 fmr->mr.user_base = iova; 708 fmr->mr.iova = iova; 709 ps = 1 << fmr->mr.page_shift; 710 fmr->mr.length = list_len * ps; 711 m = 0; 712 n = 0; 713 for (i = 0; i < list_len; i++) { 714 fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; 715 fmr->mr.map[m]->segs[n].length = ps; 716 trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps); 717 if (++n == RVT_SEGSZ) { 718 m++; 719 n = 0; 720 } 721 } 722 spin_unlock_irqrestore(&rkt->lock, flags); 723 return 0; 724 } 725 726 /** 727 * rvt_unmap_fmr - unmap fast memory regions 728 * @fmr_list: the list of fast memory regions to unmap 729 * 730 * Return: 0 on success. 731 */ 732 int rvt_unmap_fmr(struct list_head *fmr_list) 733 { 734 struct rvt_fmr *fmr; 735 struct rvt_lkey_table *rkt; 736 unsigned long flags; 737 struct rvt_dev_info *rdi; 738 739 list_for_each_entry(fmr, fmr_list, ibfmr.list) { 740 rdi = ib_to_rvt(fmr->ibfmr.device); 741 rkt = &rdi->lkey_table; 742 spin_lock_irqsave(&rkt->lock, flags); 743 fmr->mr.user_base = 0; 744 fmr->mr.iova = 0; 745 fmr->mr.length = 0; 746 spin_unlock_irqrestore(&rkt->lock, flags); 747 } 748 return 0; 749 } 750 751 /** 752 * rvt_dealloc_fmr - deallocate a fast memory region 753 * @ibfmr: the fast memory region to deallocate 754 * 755 * Return: 0 on success. 756 */ 757 int rvt_dealloc_fmr(struct ib_fmr *ibfmr) 758 { 759 struct rvt_fmr *fmr = to_ifmr(ibfmr); 760 int ret = 0; 761 unsigned long timeout; 762 763 rvt_free_lkey(&fmr->mr); 764 rvt_put_mr(&fmr->mr); /* will set completion if last */ 765 timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ); 766 if (!timeout) { 767 rvt_get_mr(&fmr->mr); 768 ret = -EBUSY; 769 goto out; 770 } 771 rvt_deinit_mregion(&fmr->mr); 772 kfree(fmr); 773 out: 774 return ret; 775 } 776 777 /** 778 * rvt_lkey_ok - check IB SGE for validity and initialize 779 * @rkt: table containing lkey to check SGE against 780 * @pd: protection domain 781 * @isge: outgoing internal SGE 782 * @sge: SGE to check 783 * @acc: access flags 784 * 785 * Check the IB SGE for validity and initialize our internal version 786 * of it. 787 * 788 * Return: 1 if valid and successful, otherwise returns 0. 789 * 790 * increments the reference count upon success 791 * 792 */ 793 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, 794 struct rvt_sge *isge, struct ib_sge *sge, int acc) 795 { 796 struct rvt_mregion *mr; 797 unsigned n, m; 798 size_t off; 799 800 /* 801 * We use LKEY == zero for kernel virtual addresses 802 * (see rvt_get_dma_mr() and dma_virt_ops). 803 */ 804 rcu_read_lock(); 805 if (sge->lkey == 0) { 806 struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); 807 808 if (pd->user) 809 goto bail; 810 mr = rcu_dereference(dev->dma_mr); 811 if (!mr) 812 goto bail; 813 rvt_get_mr(mr); 814 rcu_read_unlock(); 815 816 isge->mr = mr; 817 isge->vaddr = (void *)sge->addr; 818 isge->length = sge->length; 819 isge->sge_length = sge->length; 820 isge->m = 0; 821 isge->n = 0; 822 goto ok; 823 } 824 mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); 825 if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || 826 mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) 827 goto bail; 828 829 off = sge->addr - mr->user_base; 830 if (unlikely(sge->addr < mr->user_base || 831 off + sge->length > mr->length || 832 (mr->access_flags & acc) != acc)) 833 goto bail; 834 rvt_get_mr(mr); 835 rcu_read_unlock(); 836 837 off += mr->offset; 838 if (mr->page_shift) { 839 /* 840 * page sizes are uniform power of 2 so no loop is necessary 841 * entries_spanned_by_off is the number of times the loop below 842 * would have executed. 843 */ 844 size_t entries_spanned_by_off; 845 846 entries_spanned_by_off = off >> mr->page_shift; 847 off -= (entries_spanned_by_off << mr->page_shift); 848 m = entries_spanned_by_off / RVT_SEGSZ; 849 n = entries_spanned_by_off % RVT_SEGSZ; 850 } else { 851 m = 0; 852 n = 0; 853 while (off >= mr->map[m]->segs[n].length) { 854 off -= mr->map[m]->segs[n].length; 855 n++; 856 if (n >= RVT_SEGSZ) { 857 m++; 858 n = 0; 859 } 860 } 861 } 862 isge->mr = mr; 863 isge->vaddr = mr->map[m]->segs[n].vaddr + off; 864 isge->length = mr->map[m]->segs[n].length - off; 865 isge->sge_length = sge->length; 866 isge->m = m; 867 isge->n = n; 868 ok: 869 return 1; 870 bail: 871 rcu_read_unlock(); 872 return 0; 873 } 874 EXPORT_SYMBOL(rvt_lkey_ok); 875 876 /** 877 * rvt_rkey_ok - check the IB virtual address, length, and RKEY 878 * @qp: qp for validation 879 * @sge: SGE state 880 * @len: length of data 881 * @vaddr: virtual address to place data 882 * @rkey: rkey to check 883 * @acc: access flags 884 * 885 * Return: 1 if successful, otherwise 0. 886 * 887 * increments the reference count upon success 888 */ 889 int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, 890 u32 len, u64 vaddr, u32 rkey, int acc) 891 { 892 struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); 893 struct rvt_lkey_table *rkt = &dev->lkey_table; 894 struct rvt_mregion *mr; 895 unsigned n, m; 896 size_t off; 897 898 /* 899 * We use RKEY == zero for kernel virtual addresses 900 * (see rvt_get_dma_mr() and dma_virt_ops). 901 */ 902 rcu_read_lock(); 903 if (rkey == 0) { 904 struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); 905 struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device); 906 907 if (pd->user) 908 goto bail; 909 mr = rcu_dereference(rdi->dma_mr); 910 if (!mr) 911 goto bail; 912 rvt_get_mr(mr); 913 rcu_read_unlock(); 914 915 sge->mr = mr; 916 sge->vaddr = (void *)vaddr; 917 sge->length = len; 918 sge->sge_length = len; 919 sge->m = 0; 920 sge->n = 0; 921 goto ok; 922 } 923 924 mr = rcu_dereference(rkt->table[rkey >> rkt->shift]); 925 if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || 926 mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 927 goto bail; 928 929 off = vaddr - mr->iova; 930 if (unlikely(vaddr < mr->iova || off + len > mr->length || 931 (mr->access_flags & acc) == 0)) 932 goto bail; 933 rvt_get_mr(mr); 934 rcu_read_unlock(); 935 936 off += mr->offset; 937 if (mr->page_shift) { 938 /* 939 * page sizes are uniform power of 2 so no loop is necessary 940 * entries_spanned_by_off is the number of times the loop below 941 * would have executed. 942 */ 943 size_t entries_spanned_by_off; 944 945 entries_spanned_by_off = off >> mr->page_shift; 946 off -= (entries_spanned_by_off << mr->page_shift); 947 m = entries_spanned_by_off / RVT_SEGSZ; 948 n = entries_spanned_by_off % RVT_SEGSZ; 949 } else { 950 m = 0; 951 n = 0; 952 while (off >= mr->map[m]->segs[n].length) { 953 off -= mr->map[m]->segs[n].length; 954 n++; 955 if (n >= RVT_SEGSZ) { 956 m++; 957 n = 0; 958 } 959 } 960 } 961 sge->mr = mr; 962 sge->vaddr = mr->map[m]->segs[n].vaddr + off; 963 sge->length = mr->map[m]->segs[n].length - off; 964 sge->sge_length = len; 965 sge->m = m; 966 sge->n = n; 967 ok: 968 return 1; 969 bail: 970 rcu_read_unlock(); 971 return 0; 972 } 973 EXPORT_SYMBOL(rvt_rkey_ok); 974