1 /* 2 * Copyright(c) 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/slab.h> 49 #include <linux/vmalloc.h> 50 #include <rdma/ib_umem.h> 51 #include <rdma/rdma_vt.h> 52 #include "vt.h" 53 #include "mr.h" 54 #include "trace.h" 55 56 /** 57 * rvt_driver_mr_init - Init MR resources per driver 58 * @rdi: rvt dev struct 59 * 60 * Do any intilization needed when a driver registers with rdmavt. 61 * 62 * Return: 0 on success or errno on failure 63 */ 64 int rvt_driver_mr_init(struct rvt_dev_info *rdi) 65 { 66 unsigned int lkey_table_size = rdi->dparms.lkey_table_size; 67 unsigned lk_tab_size; 68 int i; 69 70 /* 71 * The top hfi1_lkey_table_size bits are used to index the 72 * table. The lower 8 bits can be owned by the user (copied from 73 * the LKEY). The remaining bits act as a generation number or tag. 74 */ 75 if (!lkey_table_size) 76 return -EINVAL; 77 78 spin_lock_init(&rdi->lkey_table.lock); 79 80 /* ensure generation is at least 4 bits */ 81 if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) { 82 rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n", 83 lkey_table_size, RVT_MAX_LKEY_TABLE_BITS); 84 rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS; 85 lkey_table_size = rdi->dparms.lkey_table_size; 86 } 87 rdi->lkey_table.max = 1 << lkey_table_size; 88 rdi->lkey_table.shift = 32 - lkey_table_size; 89 lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); 90 rdi->lkey_table.table = (struct rvt_mregion __rcu **) 91 vmalloc_node(lk_tab_size, rdi->dparms.node); 92 if (!rdi->lkey_table.table) 93 return -ENOMEM; 94 95 RCU_INIT_POINTER(rdi->dma_mr, NULL); 96 for (i = 0; i < rdi->lkey_table.max; i++) 97 RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); 98 99 return 0; 100 } 101 102 /** 103 *rvt_mr_exit: clean up MR 104 *@rdi: rvt dev structure 105 * 106 * called when drivers have unregistered or perhaps failed to register with us 107 */ 108 void rvt_mr_exit(struct rvt_dev_info *rdi) 109 { 110 if (rdi->dma_mr) 111 rvt_pr_err(rdi, "DMA MR not null!\n"); 112 113 vfree(rdi->lkey_table.table); 114 } 115 116 static void rvt_deinit_mregion(struct rvt_mregion *mr) 117 { 118 int i = mr->mapsz; 119 120 mr->mapsz = 0; 121 while (i) 122 kfree(mr->map[--i]); 123 percpu_ref_exit(&mr->refcount); 124 } 125 126 static void __rvt_mregion_complete(struct percpu_ref *ref) 127 { 128 struct rvt_mregion *mr = container_of(ref, struct rvt_mregion, 129 refcount); 130 131 complete(&mr->comp); 132 } 133 134 static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, 135 int count, unsigned int percpu_flags) 136 { 137 int m, i = 0; 138 struct rvt_dev_info *dev = ib_to_rvt(pd->device); 139 140 mr->mapsz = 0; 141 m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; 142 for (; i < m; i++) { 143 mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, 144 dev->dparms.node); 145 if (!mr->map[i]) 146 goto bail; 147 mr->mapsz++; 148 } 149 init_completion(&mr->comp); 150 /* count returning the ptr to user */ 151 if (percpu_ref_init(&mr->refcount, &__rvt_mregion_complete, 152 percpu_flags, GFP_KERNEL)) 153 goto bail; 154 155 atomic_set(&mr->lkey_invalid, 0); 156 mr->pd = pd; 157 mr->max_segs = count; 158 return 0; 159 bail: 160 rvt_deinit_mregion(mr); 161 return -ENOMEM; 162 } 163 164 /** 165 * rvt_alloc_lkey - allocate an lkey 166 * @mr: memory region that this lkey protects 167 * @dma_region: 0->normal key, 1->restricted DMA key 168 * 169 * Returns 0 if successful, otherwise returns -errno. 170 * 171 * Increments mr reference count as required. 172 * 173 * Sets the lkey field mr for non-dma regions. 174 * 175 */ 176 static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region) 177 { 178 unsigned long flags; 179 u32 r; 180 u32 n; 181 int ret = 0; 182 struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); 183 struct rvt_lkey_table *rkt = &dev->lkey_table; 184 185 rvt_get_mr(mr); 186 spin_lock_irqsave(&rkt->lock, flags); 187 188 /* special case for dma_mr lkey == 0 */ 189 if (dma_region) { 190 struct rvt_mregion *tmr; 191 192 tmr = rcu_access_pointer(dev->dma_mr); 193 if (!tmr) { 194 mr->lkey_published = 1; 195 /* Insure published written first */ 196 rcu_assign_pointer(dev->dma_mr, mr); 197 rvt_get_mr(mr); 198 } 199 goto success; 200 } 201 202 /* Find the next available LKEY */ 203 r = rkt->next; 204 n = r; 205 for (;;) { 206 if (!rcu_access_pointer(rkt->table[r])) 207 break; 208 r = (r + 1) & (rkt->max - 1); 209 if (r == n) 210 goto bail; 211 } 212 rkt->next = (r + 1) & (rkt->max - 1); 213 /* 214 * Make sure lkey is never zero which is reserved to indicate an 215 * unrestricted LKEY. 216 */ 217 rkt->gen++; 218 /* 219 * bits are capped to ensure enough bits for generation number 220 */ 221 mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) | 222 ((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen) 223 << 8); 224 if (mr->lkey == 0) { 225 mr->lkey |= 1 << 8; 226 rkt->gen++; 227 } 228 mr->lkey_published = 1; 229 /* Insure published written first */ 230 rcu_assign_pointer(rkt->table[r], mr); 231 success: 232 spin_unlock_irqrestore(&rkt->lock, flags); 233 out: 234 return ret; 235 bail: 236 rvt_put_mr(mr); 237 spin_unlock_irqrestore(&rkt->lock, flags); 238 ret = -ENOMEM; 239 goto out; 240 } 241 242 /** 243 * rvt_free_lkey - free an lkey 244 * @mr: mr to free from tables 245 */ 246 static void rvt_free_lkey(struct rvt_mregion *mr) 247 { 248 unsigned long flags; 249 u32 lkey = mr->lkey; 250 u32 r; 251 struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); 252 struct rvt_lkey_table *rkt = &dev->lkey_table; 253 int freed = 0; 254 255 spin_lock_irqsave(&rkt->lock, flags); 256 if (!lkey) { 257 if (mr->lkey_published) { 258 mr->lkey_published = 0; 259 /* insure published is written before pointer */ 260 rcu_assign_pointer(dev->dma_mr, NULL); 261 rvt_put_mr(mr); 262 } 263 } else { 264 if (!mr->lkey_published) 265 goto out; 266 r = lkey >> (32 - dev->dparms.lkey_table_size); 267 mr->lkey_published = 0; 268 /* insure published is written before pointer */ 269 rcu_assign_pointer(rkt->table[r], NULL); 270 } 271 freed++; 272 out: 273 spin_unlock_irqrestore(&rkt->lock, flags); 274 if (freed) 275 percpu_ref_kill(&mr->refcount); 276 } 277 278 static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd) 279 { 280 struct rvt_mr *mr; 281 int rval = -ENOMEM; 282 int m; 283 284 /* Allocate struct plus pointers to first level page tables. */ 285 m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; 286 mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL); 287 if (!mr) 288 goto bail; 289 290 rval = rvt_init_mregion(&mr->mr, pd, count, 0); 291 if (rval) 292 goto bail; 293 /* 294 * ib_reg_phys_mr() will initialize mr->ibmr except for 295 * lkey and rkey. 296 */ 297 rval = rvt_alloc_lkey(&mr->mr, 0); 298 if (rval) 299 goto bail_mregion; 300 mr->ibmr.lkey = mr->mr.lkey; 301 mr->ibmr.rkey = mr->mr.lkey; 302 done: 303 return mr; 304 305 bail_mregion: 306 rvt_deinit_mregion(&mr->mr); 307 bail: 308 kfree(mr); 309 mr = ERR_PTR(rval); 310 goto done; 311 } 312 313 static void __rvt_free_mr(struct rvt_mr *mr) 314 { 315 rvt_free_lkey(&mr->mr); 316 rvt_deinit_mregion(&mr->mr); 317 kfree(mr); 318 } 319 320 /** 321 * rvt_get_dma_mr - get a DMA memory region 322 * @pd: protection domain for this memory region 323 * @acc: access flags 324 * 325 * Return: the memory region on success, otherwise returns an errno. 326 * Note that all DMA addresses should be created via the functions in 327 * struct dma_virt_ops. 328 */ 329 struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) 330 { 331 struct rvt_mr *mr; 332 struct ib_mr *ret; 333 int rval; 334 335 if (ibpd_to_rvtpd(pd)->user) 336 return ERR_PTR(-EPERM); 337 338 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 339 if (!mr) { 340 ret = ERR_PTR(-ENOMEM); 341 goto bail; 342 } 343 344 rval = rvt_init_mregion(&mr->mr, pd, 0, 0); 345 if (rval) { 346 ret = ERR_PTR(rval); 347 goto bail; 348 } 349 350 rval = rvt_alloc_lkey(&mr->mr, 1); 351 if (rval) { 352 ret = ERR_PTR(rval); 353 goto bail_mregion; 354 } 355 356 mr->mr.access_flags = acc; 357 ret = &mr->ibmr; 358 done: 359 return ret; 360 361 bail_mregion: 362 rvt_deinit_mregion(&mr->mr); 363 bail: 364 kfree(mr); 365 goto done; 366 } 367 368 /** 369 * rvt_reg_user_mr - register a userspace memory region 370 * @pd: protection domain for this memory region 371 * @start: starting userspace address 372 * @length: length of region to register 373 * @mr_access_flags: access flags for this memory region 374 * @udata: unused by the driver 375 * 376 * Return: the memory region on success, otherwise returns an errno. 377 */ 378 struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 379 u64 virt_addr, int mr_access_flags, 380 struct ib_udata *udata) 381 { 382 struct rvt_mr *mr; 383 struct ib_umem *umem; 384 struct scatterlist *sg; 385 int n, m, entry; 386 struct ib_mr *ret; 387 388 if (length == 0) 389 return ERR_PTR(-EINVAL); 390 391 umem = ib_umem_get(pd->uobject->context, start, length, 392 mr_access_flags, 0); 393 if (IS_ERR(umem)) 394 return (void *)umem; 395 396 n = umem->nmap; 397 398 mr = __rvt_alloc_mr(n, pd); 399 if (IS_ERR(mr)) { 400 ret = (struct ib_mr *)mr; 401 goto bail_umem; 402 } 403 404 mr->mr.user_base = start; 405 mr->mr.iova = virt_addr; 406 mr->mr.length = length; 407 mr->mr.offset = ib_umem_offset(umem); 408 mr->mr.access_flags = mr_access_flags; 409 mr->umem = umem; 410 411 mr->mr.page_shift = umem->page_shift; 412 m = 0; 413 n = 0; 414 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 415 void *vaddr; 416 417 vaddr = page_address(sg_page(sg)); 418 if (!vaddr) { 419 ret = ERR_PTR(-EINVAL); 420 goto bail_inval; 421 } 422 mr->mr.map[m]->segs[n].vaddr = vaddr; 423 mr->mr.map[m]->segs[n].length = BIT(umem->page_shift); 424 trace_rvt_mr_user_seg(&mr->mr, m, n, vaddr, 425 BIT(umem->page_shift)); 426 n++; 427 if (n == RVT_SEGSZ) { 428 m++; 429 n = 0; 430 } 431 } 432 return &mr->ibmr; 433 434 bail_inval: 435 __rvt_free_mr(mr); 436 437 bail_umem: 438 ib_umem_release(umem); 439 440 return ret; 441 } 442 443 /** 444 * rvt_dereg_mr - unregister and free a memory region 445 * @ibmr: the memory region to free 446 * 447 * 448 * Note that this is called to free MRs created by rvt_get_dma_mr() 449 * or rvt_reg_user_mr(). 450 * 451 * Returns 0 on success. 452 */ 453 int rvt_dereg_mr(struct ib_mr *ibmr) 454 { 455 struct rvt_mr *mr = to_imr(ibmr); 456 struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device); 457 int ret = 0; 458 unsigned long timeout; 459 460 rvt_free_lkey(&mr->mr); 461 462 rvt_put_mr(&mr->mr); /* will set completion if last */ 463 timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ); 464 if (!timeout) { 465 rvt_pr_err(rdi, 466 "rvt_dereg_mr timeout mr %p pd %p\n", 467 mr, mr->mr.pd); 468 rvt_get_mr(&mr->mr); 469 ret = -EBUSY; 470 goto out; 471 } 472 rvt_deinit_mregion(&mr->mr); 473 if (mr->umem) 474 ib_umem_release(mr->umem); 475 kfree(mr); 476 out: 477 return ret; 478 } 479 480 /** 481 * rvt_alloc_mr - Allocate a memory region usable with the 482 * @pd: protection domain for this memory region 483 * @mr_type: mem region type 484 * @max_num_sg: Max number of segments allowed 485 * 486 * Return: the memory region on success, otherwise return an errno. 487 */ 488 struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, 489 enum ib_mr_type mr_type, 490 u32 max_num_sg) 491 { 492 struct rvt_mr *mr; 493 494 if (mr_type != IB_MR_TYPE_MEM_REG) 495 return ERR_PTR(-EINVAL); 496 497 mr = __rvt_alloc_mr(max_num_sg, pd); 498 if (IS_ERR(mr)) 499 return (struct ib_mr *)mr; 500 501 return &mr->ibmr; 502 } 503 504 /** 505 * rvt_set_page - page assignment function called by ib_sg_to_pages 506 * @ibmr: memory region 507 * @addr: dma address of mapped page 508 * 509 * Return: 0 on success 510 */ 511 static int rvt_set_page(struct ib_mr *ibmr, u64 addr) 512 { 513 struct rvt_mr *mr = to_imr(ibmr); 514 u32 ps = 1 << mr->mr.page_shift; 515 u32 mapped_segs = mr->mr.length >> mr->mr.page_shift; 516 int m, n; 517 518 if (unlikely(mapped_segs == mr->mr.max_segs)) 519 return -ENOMEM; 520 521 if (mr->mr.length == 0) { 522 mr->mr.user_base = addr; 523 mr->mr.iova = addr; 524 } 525 526 m = mapped_segs / RVT_SEGSZ; 527 n = mapped_segs % RVT_SEGSZ; 528 mr->mr.map[m]->segs[n].vaddr = (void *)addr; 529 mr->mr.map[m]->segs[n].length = ps; 530 trace_rvt_mr_page_seg(&mr->mr, m, n, (void *)addr, ps); 531 mr->mr.length += ps; 532 533 return 0; 534 } 535 536 /** 537 * rvt_map_mr_sg - map sg list and set it the memory region 538 * @ibmr: memory region 539 * @sg: dma mapped scatterlist 540 * @sg_nents: number of entries in sg 541 * @sg_offset: offset in bytes into sg 542 * 543 * Return: number of sg elements mapped to the memory region 544 */ 545 int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 546 int sg_nents, unsigned int *sg_offset) 547 { 548 struct rvt_mr *mr = to_imr(ibmr); 549 550 mr->mr.length = 0; 551 mr->mr.page_shift = PAGE_SHIFT; 552 return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 553 rvt_set_page); 554 } 555 556 /** 557 * rvt_fast_reg_mr - fast register physical MR 558 * @qp: the queue pair where the work request comes from 559 * @ibmr: the memory region to be registered 560 * @key: updated key for this memory region 561 * @access: access flags for this memory region 562 * 563 * Returns 0 on success. 564 */ 565 int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, 566 int access) 567 { 568 struct rvt_mr *mr = to_imr(ibmr); 569 570 if (qp->ibqp.pd != mr->mr.pd) 571 return -EACCES; 572 573 /* not applicable to dma MR or user MR */ 574 if (!mr->mr.lkey || mr->umem) 575 return -EINVAL; 576 577 if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00)) 578 return -EINVAL; 579 580 ibmr->lkey = key; 581 ibmr->rkey = key; 582 mr->mr.lkey = key; 583 mr->mr.access_flags = access; 584 atomic_set(&mr->mr.lkey_invalid, 0); 585 586 return 0; 587 } 588 EXPORT_SYMBOL(rvt_fast_reg_mr); 589 590 /** 591 * rvt_invalidate_rkey - invalidate an MR rkey 592 * @qp: queue pair associated with the invalidate op 593 * @rkey: rkey to invalidate 594 * 595 * Returns 0 on success. 596 */ 597 int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey) 598 { 599 struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); 600 struct rvt_lkey_table *rkt = &dev->lkey_table; 601 struct rvt_mregion *mr; 602 603 if (rkey == 0) 604 return -EINVAL; 605 606 rcu_read_lock(); 607 mr = rcu_dereference( 608 rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); 609 if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 610 goto bail; 611 612 atomic_set(&mr->lkey_invalid, 1); 613 rcu_read_unlock(); 614 return 0; 615 616 bail: 617 rcu_read_unlock(); 618 return -EINVAL; 619 } 620 EXPORT_SYMBOL(rvt_invalidate_rkey); 621 622 /** 623 * rvt_alloc_fmr - allocate a fast memory region 624 * @pd: the protection domain for this memory region 625 * @mr_access_flags: access flags for this memory region 626 * @fmr_attr: fast memory region attributes 627 * 628 * Return: the memory region on success, otherwise returns an errno. 629 */ 630 struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, 631 struct ib_fmr_attr *fmr_attr) 632 { 633 struct rvt_fmr *fmr; 634 int m; 635 struct ib_fmr *ret; 636 int rval = -ENOMEM; 637 638 /* Allocate struct plus pointers to first level page tables. */ 639 m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ; 640 fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL); 641 if (!fmr) 642 goto bail; 643 644 rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages, 645 PERCPU_REF_INIT_ATOMIC); 646 if (rval) 647 goto bail; 648 649 /* 650 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & 651 * rkey. 652 */ 653 rval = rvt_alloc_lkey(&fmr->mr, 0); 654 if (rval) 655 goto bail_mregion; 656 fmr->ibfmr.rkey = fmr->mr.lkey; 657 fmr->ibfmr.lkey = fmr->mr.lkey; 658 /* 659 * Resources are allocated but no valid mapping (RKEY can't be 660 * used). 661 */ 662 fmr->mr.access_flags = mr_access_flags; 663 fmr->mr.max_segs = fmr_attr->max_pages; 664 fmr->mr.page_shift = fmr_attr->page_shift; 665 666 ret = &fmr->ibfmr; 667 done: 668 return ret; 669 670 bail_mregion: 671 rvt_deinit_mregion(&fmr->mr); 672 bail: 673 kfree(fmr); 674 ret = ERR_PTR(rval); 675 goto done; 676 } 677 678 /** 679 * rvt_map_phys_fmr - set up a fast memory region 680 * @ibmfr: the fast memory region to set up 681 * @page_list: the list of pages to associate with the fast memory region 682 * @list_len: the number of pages to associate with the fast memory region 683 * @iova: the virtual address of the start of the fast memory region 684 * 685 * This may be called from interrupt context. 686 * 687 * Return: 0 on success 688 */ 689 690 int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, 691 int list_len, u64 iova) 692 { 693 struct rvt_fmr *fmr = to_ifmr(ibfmr); 694 struct rvt_lkey_table *rkt; 695 unsigned long flags; 696 int m, n; 697 unsigned long i; 698 u32 ps; 699 struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); 700 701 i = atomic_long_read(&fmr->mr.refcount.count); 702 if (i > 2) 703 return -EBUSY; 704 705 if (list_len > fmr->mr.max_segs) 706 return -EINVAL; 707 708 rkt = &rdi->lkey_table; 709 spin_lock_irqsave(&rkt->lock, flags); 710 fmr->mr.user_base = iova; 711 fmr->mr.iova = iova; 712 ps = 1 << fmr->mr.page_shift; 713 fmr->mr.length = list_len * ps; 714 m = 0; 715 n = 0; 716 for (i = 0; i < list_len; i++) { 717 fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; 718 fmr->mr.map[m]->segs[n].length = ps; 719 trace_rvt_mr_fmr_seg(&fmr->mr, m, n, (void *)page_list[i], ps); 720 if (++n == RVT_SEGSZ) { 721 m++; 722 n = 0; 723 } 724 } 725 spin_unlock_irqrestore(&rkt->lock, flags); 726 return 0; 727 } 728 729 /** 730 * rvt_unmap_fmr - unmap fast memory regions 731 * @fmr_list: the list of fast memory regions to unmap 732 * 733 * Return: 0 on success. 734 */ 735 int rvt_unmap_fmr(struct list_head *fmr_list) 736 { 737 struct rvt_fmr *fmr; 738 struct rvt_lkey_table *rkt; 739 unsigned long flags; 740 struct rvt_dev_info *rdi; 741 742 list_for_each_entry(fmr, fmr_list, ibfmr.list) { 743 rdi = ib_to_rvt(fmr->ibfmr.device); 744 rkt = &rdi->lkey_table; 745 spin_lock_irqsave(&rkt->lock, flags); 746 fmr->mr.user_base = 0; 747 fmr->mr.iova = 0; 748 fmr->mr.length = 0; 749 spin_unlock_irqrestore(&rkt->lock, flags); 750 } 751 return 0; 752 } 753 754 /** 755 * rvt_dealloc_fmr - deallocate a fast memory region 756 * @ibfmr: the fast memory region to deallocate 757 * 758 * Return: 0 on success. 759 */ 760 int rvt_dealloc_fmr(struct ib_fmr *ibfmr) 761 { 762 struct rvt_fmr *fmr = to_ifmr(ibfmr); 763 int ret = 0; 764 unsigned long timeout; 765 766 rvt_free_lkey(&fmr->mr); 767 rvt_put_mr(&fmr->mr); /* will set completion if last */ 768 timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ); 769 if (!timeout) { 770 rvt_get_mr(&fmr->mr); 771 ret = -EBUSY; 772 goto out; 773 } 774 rvt_deinit_mregion(&fmr->mr); 775 kfree(fmr); 776 out: 777 return ret; 778 } 779 780 /** 781 * rvt_lkey_ok - check IB SGE for validity and initialize 782 * @rkt: table containing lkey to check SGE against 783 * @pd: protection domain 784 * @isge: outgoing internal SGE 785 * @sge: SGE to check 786 * @acc: access flags 787 * 788 * Check the IB SGE for validity and initialize our internal version 789 * of it. 790 * 791 * Return: 1 if valid and successful, otherwise returns 0. 792 * 793 * increments the reference count upon success 794 * 795 */ 796 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, 797 struct rvt_sge *isge, struct ib_sge *sge, int acc) 798 { 799 struct rvt_mregion *mr; 800 unsigned n, m; 801 size_t off; 802 803 /* 804 * We use LKEY == zero for kernel virtual addresses 805 * (see rvt_get_dma_mr() and dma_virt_ops). 806 */ 807 rcu_read_lock(); 808 if (sge->lkey == 0) { 809 struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); 810 811 if (pd->user) 812 goto bail; 813 mr = rcu_dereference(dev->dma_mr); 814 if (!mr) 815 goto bail; 816 rvt_get_mr(mr); 817 rcu_read_unlock(); 818 819 isge->mr = mr; 820 isge->vaddr = (void *)sge->addr; 821 isge->length = sge->length; 822 isge->sge_length = sge->length; 823 isge->m = 0; 824 isge->n = 0; 825 goto ok; 826 } 827 mr = rcu_dereference(rkt->table[sge->lkey >> rkt->shift]); 828 if (!mr) 829 goto bail; 830 rvt_get_mr(mr); 831 if (!READ_ONCE(mr->lkey_published)) 832 goto bail_unref; 833 834 if (unlikely(atomic_read(&mr->lkey_invalid) || 835 mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) 836 goto bail_unref; 837 838 off = sge->addr - mr->user_base; 839 if (unlikely(sge->addr < mr->user_base || 840 off + sge->length > mr->length || 841 (mr->access_flags & acc) != acc)) 842 goto bail_unref; 843 rcu_read_unlock(); 844 845 off += mr->offset; 846 if (mr->page_shift) { 847 /* 848 * page sizes are uniform power of 2 so no loop is necessary 849 * entries_spanned_by_off is the number of times the loop below 850 * would have executed. 851 */ 852 size_t entries_spanned_by_off; 853 854 entries_spanned_by_off = off >> mr->page_shift; 855 off -= (entries_spanned_by_off << mr->page_shift); 856 m = entries_spanned_by_off / RVT_SEGSZ; 857 n = entries_spanned_by_off % RVT_SEGSZ; 858 } else { 859 m = 0; 860 n = 0; 861 while (off >= mr->map[m]->segs[n].length) { 862 off -= mr->map[m]->segs[n].length; 863 n++; 864 if (n >= RVT_SEGSZ) { 865 m++; 866 n = 0; 867 } 868 } 869 } 870 isge->mr = mr; 871 isge->vaddr = mr->map[m]->segs[n].vaddr + off; 872 isge->length = mr->map[m]->segs[n].length - off; 873 isge->sge_length = sge->length; 874 isge->m = m; 875 isge->n = n; 876 ok: 877 return 1; 878 bail_unref: 879 rvt_put_mr(mr); 880 bail: 881 rcu_read_unlock(); 882 return 0; 883 } 884 EXPORT_SYMBOL(rvt_lkey_ok); 885 886 /** 887 * rvt_rkey_ok - check the IB virtual address, length, and RKEY 888 * @qp: qp for validation 889 * @sge: SGE state 890 * @len: length of data 891 * @vaddr: virtual address to place data 892 * @rkey: rkey to check 893 * @acc: access flags 894 * 895 * Return: 1 if successful, otherwise 0. 896 * 897 * increments the reference count upon success 898 */ 899 int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, 900 u32 len, u64 vaddr, u32 rkey, int acc) 901 { 902 struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); 903 struct rvt_lkey_table *rkt = &dev->lkey_table; 904 struct rvt_mregion *mr; 905 unsigned n, m; 906 size_t off; 907 908 /* 909 * We use RKEY == zero for kernel virtual addresses 910 * (see rvt_get_dma_mr() and dma_virt_ops). 911 */ 912 rcu_read_lock(); 913 if (rkey == 0) { 914 struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); 915 struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device); 916 917 if (pd->user) 918 goto bail; 919 mr = rcu_dereference(rdi->dma_mr); 920 if (!mr) 921 goto bail; 922 rvt_get_mr(mr); 923 rcu_read_unlock(); 924 925 sge->mr = mr; 926 sge->vaddr = (void *)vaddr; 927 sge->length = len; 928 sge->sge_length = len; 929 sge->m = 0; 930 sge->n = 0; 931 goto ok; 932 } 933 934 mr = rcu_dereference(rkt->table[rkey >> rkt->shift]); 935 if (!mr) 936 goto bail; 937 rvt_get_mr(mr); 938 /* insure mr read is before test */ 939 if (!READ_ONCE(mr->lkey_published)) 940 goto bail_unref; 941 if (unlikely(atomic_read(&mr->lkey_invalid) || 942 mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 943 goto bail_unref; 944 945 off = vaddr - mr->iova; 946 if (unlikely(vaddr < mr->iova || off + len > mr->length || 947 (mr->access_flags & acc) == 0)) 948 goto bail_unref; 949 rcu_read_unlock(); 950 951 off += mr->offset; 952 if (mr->page_shift) { 953 /* 954 * page sizes are uniform power of 2 so no loop is necessary 955 * entries_spanned_by_off is the number of times the loop below 956 * would have executed. 957 */ 958 size_t entries_spanned_by_off; 959 960 entries_spanned_by_off = off >> mr->page_shift; 961 off -= (entries_spanned_by_off << mr->page_shift); 962 m = entries_spanned_by_off / RVT_SEGSZ; 963 n = entries_spanned_by_off % RVT_SEGSZ; 964 } else { 965 m = 0; 966 n = 0; 967 while (off >= mr->map[m]->segs[n].length) { 968 off -= mr->map[m]->segs[n].length; 969 n++; 970 if (n >= RVT_SEGSZ) { 971 m++; 972 n = 0; 973 } 974 } 975 } 976 sge->mr = mr; 977 sge->vaddr = mr->map[m]->segs[n].vaddr + off; 978 sge->length = mr->map[m]->segs[n].length - off; 979 sge->sge_length = len; 980 sge->m = m; 981 sge->n = n; 982 ok: 983 return 1; 984 bail_unref: 985 rvt_put_mr(mr); 986 bail: 987 rcu_read_unlock(); 988 return 0; 989 } 990 EXPORT_SYMBOL(rvt_rkey_ok); 991