1 /* 2 * Copyright(c) 2016 Intel Corporation. 3 * 4 * This file is provided under a dual BSD/GPLv2 license. When using or 5 * redistributing this file, you may do so under either license. 6 * 7 * GPL LICENSE SUMMARY 8 * 9 * This program is free software; you can redistribute it and/or modify 10 * it under the terms of version 2 of the GNU General Public License as 11 * published by the Free Software Foundation. 12 * 13 * This program is distributed in the hope that it will be useful, but 14 * WITHOUT ANY WARRANTY; without even the implied warranty of 15 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU 16 * General Public License for more details. 17 * 18 * BSD LICENSE 19 * 20 * Redistribution and use in source and binary forms, with or without 21 * modification, are permitted provided that the following conditions 22 * are met: 23 * 24 * - Redistributions of source code must retain the above copyright 25 * notice, this list of conditions and the following disclaimer. 26 * - Redistributions in binary form must reproduce the above copyright 27 * notice, this list of conditions and the following disclaimer in 28 * the documentation and/or other materials provided with the 29 * distribution. 30 * - Neither the name of Intel Corporation nor the names of its 31 * contributors may be used to endorse or promote products derived 32 * from this software without specific prior written permission. 33 * 34 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 35 * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 36 * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 37 * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 38 * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 39 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 40 * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 41 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 42 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 43 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 44 * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 45 * 46 */ 47 48 #include <linux/slab.h> 49 #include <linux/vmalloc.h> 50 #include <rdma/ib_umem.h> 51 #include <rdma/rdma_vt.h> 52 #include "vt.h" 53 #include "mr.h" 54 55 /** 56 * rvt_driver_mr_init - Init MR resources per driver 57 * @rdi: rvt dev struct 58 * 59 * Do any intilization needed when a driver registers with rdmavt. 60 * 61 * Return: 0 on success or errno on failure 62 */ 63 int rvt_driver_mr_init(struct rvt_dev_info *rdi) 64 { 65 unsigned int lkey_table_size = rdi->dparms.lkey_table_size; 66 unsigned lk_tab_size; 67 int i; 68 69 /* 70 * The top hfi1_lkey_table_size bits are used to index the 71 * table. The lower 8 bits can be owned by the user (copied from 72 * the LKEY). The remaining bits act as a generation number or tag. 73 */ 74 if (!lkey_table_size) 75 return -EINVAL; 76 77 spin_lock_init(&rdi->lkey_table.lock); 78 79 /* ensure generation is at least 4 bits */ 80 if (lkey_table_size > RVT_MAX_LKEY_TABLE_BITS) { 81 rvt_pr_warn(rdi, "lkey bits %u too large, reduced to %u\n", 82 lkey_table_size, RVT_MAX_LKEY_TABLE_BITS); 83 rdi->dparms.lkey_table_size = RVT_MAX_LKEY_TABLE_BITS; 84 lkey_table_size = rdi->dparms.lkey_table_size; 85 } 86 rdi->lkey_table.max = 1 << lkey_table_size; 87 lk_tab_size = rdi->lkey_table.max * sizeof(*rdi->lkey_table.table); 88 rdi->lkey_table.table = (struct rvt_mregion __rcu **) 89 vmalloc_node(lk_tab_size, rdi->dparms.node); 90 if (!rdi->lkey_table.table) 91 return -ENOMEM; 92 93 RCU_INIT_POINTER(rdi->dma_mr, NULL); 94 for (i = 0; i < rdi->lkey_table.max; i++) 95 RCU_INIT_POINTER(rdi->lkey_table.table[i], NULL); 96 97 return 0; 98 } 99 100 /** 101 *rvt_mr_exit: clean up MR 102 *@rdi: rvt dev structure 103 * 104 * called when drivers have unregistered or perhaps failed to register with us 105 */ 106 void rvt_mr_exit(struct rvt_dev_info *rdi) 107 { 108 if (rdi->dma_mr) 109 rvt_pr_err(rdi, "DMA MR not null!\n"); 110 111 vfree(rdi->lkey_table.table); 112 } 113 114 static void rvt_deinit_mregion(struct rvt_mregion *mr) 115 { 116 int i = mr->mapsz; 117 118 mr->mapsz = 0; 119 while (i) 120 kfree(mr->map[--i]); 121 } 122 123 static int rvt_init_mregion(struct rvt_mregion *mr, struct ib_pd *pd, 124 int count) 125 { 126 int m, i = 0; 127 struct rvt_dev_info *dev = ib_to_rvt(pd->device); 128 129 mr->mapsz = 0; 130 m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; 131 for (; i < m; i++) { 132 mr->map[i] = kzalloc_node(sizeof(*mr->map[0]), GFP_KERNEL, 133 dev->dparms.node); 134 if (!mr->map[i]) { 135 rvt_deinit_mregion(mr); 136 return -ENOMEM; 137 } 138 mr->mapsz++; 139 } 140 init_completion(&mr->comp); 141 /* count returning the ptr to user */ 142 atomic_set(&mr->refcount, 1); 143 atomic_set(&mr->lkey_invalid, 0); 144 mr->pd = pd; 145 mr->max_segs = count; 146 return 0; 147 } 148 149 /** 150 * rvt_alloc_lkey - allocate an lkey 151 * @mr: memory region that this lkey protects 152 * @dma_region: 0->normal key, 1->restricted DMA key 153 * 154 * Returns 0 if successful, otherwise returns -errno. 155 * 156 * Increments mr reference count as required. 157 * 158 * Sets the lkey field mr for non-dma regions. 159 * 160 */ 161 static int rvt_alloc_lkey(struct rvt_mregion *mr, int dma_region) 162 { 163 unsigned long flags; 164 u32 r; 165 u32 n; 166 int ret = 0; 167 struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); 168 struct rvt_lkey_table *rkt = &dev->lkey_table; 169 170 rvt_get_mr(mr); 171 spin_lock_irqsave(&rkt->lock, flags); 172 173 /* special case for dma_mr lkey == 0 */ 174 if (dma_region) { 175 struct rvt_mregion *tmr; 176 177 tmr = rcu_access_pointer(dev->dma_mr); 178 if (!tmr) { 179 rcu_assign_pointer(dev->dma_mr, mr); 180 mr->lkey_published = 1; 181 } else { 182 rvt_put_mr(mr); 183 } 184 goto success; 185 } 186 187 /* Find the next available LKEY */ 188 r = rkt->next; 189 n = r; 190 for (;;) { 191 if (!rcu_access_pointer(rkt->table[r])) 192 break; 193 r = (r + 1) & (rkt->max - 1); 194 if (r == n) 195 goto bail; 196 } 197 rkt->next = (r + 1) & (rkt->max - 1); 198 /* 199 * Make sure lkey is never zero which is reserved to indicate an 200 * unrestricted LKEY. 201 */ 202 rkt->gen++; 203 /* 204 * bits are capped to ensure enough bits for generation number 205 */ 206 mr->lkey = (r << (32 - dev->dparms.lkey_table_size)) | 207 ((((1 << (24 - dev->dparms.lkey_table_size)) - 1) & rkt->gen) 208 << 8); 209 if (mr->lkey == 0) { 210 mr->lkey |= 1 << 8; 211 rkt->gen++; 212 } 213 rcu_assign_pointer(rkt->table[r], mr); 214 mr->lkey_published = 1; 215 success: 216 spin_unlock_irqrestore(&rkt->lock, flags); 217 out: 218 return ret; 219 bail: 220 rvt_put_mr(mr); 221 spin_unlock_irqrestore(&rkt->lock, flags); 222 ret = -ENOMEM; 223 goto out; 224 } 225 226 /** 227 * rvt_free_lkey - free an lkey 228 * @mr: mr to free from tables 229 */ 230 static void rvt_free_lkey(struct rvt_mregion *mr) 231 { 232 unsigned long flags; 233 u32 lkey = mr->lkey; 234 u32 r; 235 struct rvt_dev_info *dev = ib_to_rvt(mr->pd->device); 236 struct rvt_lkey_table *rkt = &dev->lkey_table; 237 int freed = 0; 238 239 spin_lock_irqsave(&rkt->lock, flags); 240 if (!mr->lkey_published) 241 goto out; 242 if (lkey == 0) { 243 RCU_INIT_POINTER(dev->dma_mr, NULL); 244 } else { 245 r = lkey >> (32 - dev->dparms.lkey_table_size); 246 RCU_INIT_POINTER(rkt->table[r], NULL); 247 } 248 mr->lkey_published = 0; 249 freed++; 250 out: 251 spin_unlock_irqrestore(&rkt->lock, flags); 252 if (freed) { 253 synchronize_rcu(); 254 rvt_put_mr(mr); 255 } 256 } 257 258 static struct rvt_mr *__rvt_alloc_mr(int count, struct ib_pd *pd) 259 { 260 struct rvt_mr *mr; 261 int rval = -ENOMEM; 262 int m; 263 264 /* Allocate struct plus pointers to first level page tables. */ 265 m = (count + RVT_SEGSZ - 1) / RVT_SEGSZ; 266 mr = kzalloc(sizeof(*mr) + m * sizeof(mr->mr.map[0]), GFP_KERNEL); 267 if (!mr) 268 goto bail; 269 270 rval = rvt_init_mregion(&mr->mr, pd, count); 271 if (rval) 272 goto bail; 273 /* 274 * ib_reg_phys_mr() will initialize mr->ibmr except for 275 * lkey and rkey. 276 */ 277 rval = rvt_alloc_lkey(&mr->mr, 0); 278 if (rval) 279 goto bail_mregion; 280 mr->ibmr.lkey = mr->mr.lkey; 281 mr->ibmr.rkey = mr->mr.lkey; 282 done: 283 return mr; 284 285 bail_mregion: 286 rvt_deinit_mregion(&mr->mr); 287 bail: 288 kfree(mr); 289 mr = ERR_PTR(rval); 290 goto done; 291 } 292 293 static void __rvt_free_mr(struct rvt_mr *mr) 294 { 295 rvt_deinit_mregion(&mr->mr); 296 rvt_free_lkey(&mr->mr); 297 kfree(mr); 298 } 299 300 /** 301 * rvt_get_dma_mr - get a DMA memory region 302 * @pd: protection domain for this memory region 303 * @acc: access flags 304 * 305 * Return: the memory region on success, otherwise returns an errno. 306 * Note that all DMA addresses should be created via the 307 * struct ib_dma_mapping_ops functions (see dma.c). 308 */ 309 struct ib_mr *rvt_get_dma_mr(struct ib_pd *pd, int acc) 310 { 311 struct rvt_mr *mr; 312 struct ib_mr *ret; 313 int rval; 314 315 if (ibpd_to_rvtpd(pd)->user) 316 return ERR_PTR(-EPERM); 317 318 mr = kzalloc(sizeof(*mr), GFP_KERNEL); 319 if (!mr) { 320 ret = ERR_PTR(-ENOMEM); 321 goto bail; 322 } 323 324 rval = rvt_init_mregion(&mr->mr, pd, 0); 325 if (rval) { 326 ret = ERR_PTR(rval); 327 goto bail; 328 } 329 330 rval = rvt_alloc_lkey(&mr->mr, 1); 331 if (rval) { 332 ret = ERR_PTR(rval); 333 goto bail_mregion; 334 } 335 336 mr->mr.access_flags = acc; 337 ret = &mr->ibmr; 338 done: 339 return ret; 340 341 bail_mregion: 342 rvt_deinit_mregion(&mr->mr); 343 bail: 344 kfree(mr); 345 goto done; 346 } 347 348 /** 349 * rvt_reg_user_mr - register a userspace memory region 350 * @pd: protection domain for this memory region 351 * @start: starting userspace address 352 * @length: length of region to register 353 * @mr_access_flags: access flags for this memory region 354 * @udata: unused by the driver 355 * 356 * Return: the memory region on success, otherwise returns an errno. 357 */ 358 struct ib_mr *rvt_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, 359 u64 virt_addr, int mr_access_flags, 360 struct ib_udata *udata) 361 { 362 struct rvt_mr *mr; 363 struct ib_umem *umem; 364 struct scatterlist *sg; 365 int n, m, entry; 366 struct ib_mr *ret; 367 368 if (length == 0) 369 return ERR_PTR(-EINVAL); 370 371 umem = ib_umem_get(pd->uobject->context, start, length, 372 mr_access_flags, 0); 373 if (IS_ERR(umem)) 374 return (void *)umem; 375 376 n = umem->nmap; 377 378 mr = __rvt_alloc_mr(n, pd); 379 if (IS_ERR(mr)) { 380 ret = (struct ib_mr *)mr; 381 goto bail_umem; 382 } 383 384 mr->mr.user_base = start; 385 mr->mr.iova = virt_addr; 386 mr->mr.length = length; 387 mr->mr.offset = ib_umem_offset(umem); 388 mr->mr.access_flags = mr_access_flags; 389 mr->umem = umem; 390 391 if (is_power_of_2(umem->page_size)) 392 mr->mr.page_shift = ilog2(umem->page_size); 393 m = 0; 394 n = 0; 395 for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { 396 void *vaddr; 397 398 vaddr = page_address(sg_page(sg)); 399 if (!vaddr) { 400 ret = ERR_PTR(-EINVAL); 401 goto bail_inval; 402 } 403 mr->mr.map[m]->segs[n].vaddr = vaddr; 404 mr->mr.map[m]->segs[n].length = umem->page_size; 405 n++; 406 if (n == RVT_SEGSZ) { 407 m++; 408 n = 0; 409 } 410 } 411 return &mr->ibmr; 412 413 bail_inval: 414 __rvt_free_mr(mr); 415 416 bail_umem: 417 ib_umem_release(umem); 418 419 return ret; 420 } 421 422 /** 423 * rvt_dereg_mr - unregister and free a memory region 424 * @ibmr: the memory region to free 425 * 426 * 427 * Note that this is called to free MRs created by rvt_get_dma_mr() 428 * or rvt_reg_user_mr(). 429 * 430 * Returns 0 on success. 431 */ 432 int rvt_dereg_mr(struct ib_mr *ibmr) 433 { 434 struct rvt_mr *mr = to_imr(ibmr); 435 struct rvt_dev_info *rdi = ib_to_rvt(ibmr->pd->device); 436 int ret = 0; 437 unsigned long timeout; 438 439 rvt_free_lkey(&mr->mr); 440 441 rvt_put_mr(&mr->mr); /* will set completion if last */ 442 timeout = wait_for_completion_timeout(&mr->mr.comp, 5 * HZ); 443 if (!timeout) { 444 rvt_pr_err(rdi, 445 "rvt_dereg_mr timeout mr %p pd %p refcount %u\n", 446 mr, mr->mr.pd, atomic_read(&mr->mr.refcount)); 447 rvt_get_mr(&mr->mr); 448 ret = -EBUSY; 449 goto out; 450 } 451 rvt_deinit_mregion(&mr->mr); 452 if (mr->umem) 453 ib_umem_release(mr->umem); 454 kfree(mr); 455 out: 456 return ret; 457 } 458 459 /** 460 * rvt_alloc_mr - Allocate a memory region usable with the 461 * @pd: protection domain for this memory region 462 * @mr_type: mem region type 463 * @max_num_sg: Max number of segments allowed 464 * 465 * Return: the memory region on success, otherwise return an errno. 466 */ 467 struct ib_mr *rvt_alloc_mr(struct ib_pd *pd, 468 enum ib_mr_type mr_type, 469 u32 max_num_sg) 470 { 471 struct rvt_mr *mr; 472 473 if (mr_type != IB_MR_TYPE_MEM_REG) 474 return ERR_PTR(-EINVAL); 475 476 mr = __rvt_alloc_mr(max_num_sg, pd); 477 if (IS_ERR(mr)) 478 return (struct ib_mr *)mr; 479 480 return &mr->ibmr; 481 } 482 483 /** 484 * rvt_set_page - page assignment function called by ib_sg_to_pages 485 * @ibmr: memory region 486 * @addr: dma address of mapped page 487 * 488 * Return: 0 on success 489 */ 490 static int rvt_set_page(struct ib_mr *ibmr, u64 addr) 491 { 492 struct rvt_mr *mr = to_imr(ibmr); 493 u32 ps = 1 << mr->mr.page_shift; 494 u32 mapped_segs = mr->mr.length >> mr->mr.page_shift; 495 int m, n; 496 497 if (unlikely(mapped_segs == mr->mr.max_segs)) 498 return -ENOMEM; 499 500 if (mr->mr.length == 0) { 501 mr->mr.user_base = addr; 502 mr->mr.iova = addr; 503 } 504 505 m = mapped_segs / RVT_SEGSZ; 506 n = mapped_segs % RVT_SEGSZ; 507 mr->mr.map[m]->segs[n].vaddr = (void *)addr; 508 mr->mr.map[m]->segs[n].length = ps; 509 mr->mr.length += ps; 510 511 return 0; 512 } 513 514 /** 515 * rvt_map_mr_sg - map sg list and set it the memory region 516 * @ibmr: memory region 517 * @sg: dma mapped scatterlist 518 * @sg_nents: number of entries in sg 519 * @sg_offset: offset in bytes into sg 520 * 521 * Return: number of sg elements mapped to the memory region 522 */ 523 int rvt_map_mr_sg(struct ib_mr *ibmr, struct scatterlist *sg, 524 int sg_nents, unsigned int *sg_offset) 525 { 526 struct rvt_mr *mr = to_imr(ibmr); 527 528 mr->mr.length = 0; 529 mr->mr.page_shift = PAGE_SHIFT; 530 return ib_sg_to_pages(ibmr, sg, sg_nents, sg_offset, 531 rvt_set_page); 532 } 533 534 /** 535 * rvt_fast_reg_mr - fast register physical MR 536 * @qp: the queue pair where the work request comes from 537 * @ibmr: the memory region to be registered 538 * @key: updated key for this memory region 539 * @access: access flags for this memory region 540 * 541 * Returns 0 on success. 542 */ 543 int rvt_fast_reg_mr(struct rvt_qp *qp, struct ib_mr *ibmr, u32 key, 544 int access) 545 { 546 struct rvt_mr *mr = to_imr(ibmr); 547 548 if (qp->ibqp.pd != mr->mr.pd) 549 return -EACCES; 550 551 /* not applicable to dma MR or user MR */ 552 if (!mr->mr.lkey || mr->umem) 553 return -EINVAL; 554 555 if ((key & 0xFFFFFF00) != (mr->mr.lkey & 0xFFFFFF00)) 556 return -EINVAL; 557 558 ibmr->lkey = key; 559 ibmr->rkey = key; 560 mr->mr.lkey = key; 561 mr->mr.access_flags = access; 562 atomic_set(&mr->mr.lkey_invalid, 0); 563 564 return 0; 565 } 566 EXPORT_SYMBOL(rvt_fast_reg_mr); 567 568 /** 569 * rvt_invalidate_rkey - invalidate an MR rkey 570 * @qp: queue pair associated with the invalidate op 571 * @rkey: rkey to invalidate 572 * 573 * Returns 0 on success. 574 */ 575 int rvt_invalidate_rkey(struct rvt_qp *qp, u32 rkey) 576 { 577 struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); 578 struct rvt_lkey_table *rkt = &dev->lkey_table; 579 struct rvt_mregion *mr; 580 581 if (rkey == 0) 582 return -EINVAL; 583 584 rcu_read_lock(); 585 mr = rcu_dereference( 586 rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); 587 if (unlikely(!mr || mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 588 goto bail; 589 590 atomic_set(&mr->lkey_invalid, 1); 591 rcu_read_unlock(); 592 return 0; 593 594 bail: 595 rcu_read_unlock(); 596 return -EINVAL; 597 } 598 EXPORT_SYMBOL(rvt_invalidate_rkey); 599 600 /** 601 * rvt_alloc_fmr - allocate a fast memory region 602 * @pd: the protection domain for this memory region 603 * @mr_access_flags: access flags for this memory region 604 * @fmr_attr: fast memory region attributes 605 * 606 * Return: the memory region on success, otherwise returns an errno. 607 */ 608 struct ib_fmr *rvt_alloc_fmr(struct ib_pd *pd, int mr_access_flags, 609 struct ib_fmr_attr *fmr_attr) 610 { 611 struct rvt_fmr *fmr; 612 int m; 613 struct ib_fmr *ret; 614 int rval = -ENOMEM; 615 616 /* Allocate struct plus pointers to first level page tables. */ 617 m = (fmr_attr->max_pages + RVT_SEGSZ - 1) / RVT_SEGSZ; 618 fmr = kzalloc(sizeof(*fmr) + m * sizeof(fmr->mr.map[0]), GFP_KERNEL); 619 if (!fmr) 620 goto bail; 621 622 rval = rvt_init_mregion(&fmr->mr, pd, fmr_attr->max_pages); 623 if (rval) 624 goto bail; 625 626 /* 627 * ib_alloc_fmr() will initialize fmr->ibfmr except for lkey & 628 * rkey. 629 */ 630 rval = rvt_alloc_lkey(&fmr->mr, 0); 631 if (rval) 632 goto bail_mregion; 633 fmr->ibfmr.rkey = fmr->mr.lkey; 634 fmr->ibfmr.lkey = fmr->mr.lkey; 635 /* 636 * Resources are allocated but no valid mapping (RKEY can't be 637 * used). 638 */ 639 fmr->mr.access_flags = mr_access_flags; 640 fmr->mr.max_segs = fmr_attr->max_pages; 641 fmr->mr.page_shift = fmr_attr->page_shift; 642 643 ret = &fmr->ibfmr; 644 done: 645 return ret; 646 647 bail_mregion: 648 rvt_deinit_mregion(&fmr->mr); 649 bail: 650 kfree(fmr); 651 ret = ERR_PTR(rval); 652 goto done; 653 } 654 655 /** 656 * rvt_map_phys_fmr - set up a fast memory region 657 * @ibmfr: the fast memory region to set up 658 * @page_list: the list of pages to associate with the fast memory region 659 * @list_len: the number of pages to associate with the fast memory region 660 * @iova: the virtual address of the start of the fast memory region 661 * 662 * This may be called from interrupt context. 663 * 664 * Return: 0 on success 665 */ 666 667 int rvt_map_phys_fmr(struct ib_fmr *ibfmr, u64 *page_list, 668 int list_len, u64 iova) 669 { 670 struct rvt_fmr *fmr = to_ifmr(ibfmr); 671 struct rvt_lkey_table *rkt; 672 unsigned long flags; 673 int m, n, i; 674 u32 ps; 675 struct rvt_dev_info *rdi = ib_to_rvt(ibfmr->device); 676 677 i = atomic_read(&fmr->mr.refcount); 678 if (i > 2) 679 return -EBUSY; 680 681 if (list_len > fmr->mr.max_segs) 682 return -EINVAL; 683 684 rkt = &rdi->lkey_table; 685 spin_lock_irqsave(&rkt->lock, flags); 686 fmr->mr.user_base = iova; 687 fmr->mr.iova = iova; 688 ps = 1 << fmr->mr.page_shift; 689 fmr->mr.length = list_len * ps; 690 m = 0; 691 n = 0; 692 for (i = 0; i < list_len; i++) { 693 fmr->mr.map[m]->segs[n].vaddr = (void *)page_list[i]; 694 fmr->mr.map[m]->segs[n].length = ps; 695 if (++n == RVT_SEGSZ) { 696 m++; 697 n = 0; 698 } 699 } 700 spin_unlock_irqrestore(&rkt->lock, flags); 701 return 0; 702 } 703 704 /** 705 * rvt_unmap_fmr - unmap fast memory regions 706 * @fmr_list: the list of fast memory regions to unmap 707 * 708 * Return: 0 on success. 709 */ 710 int rvt_unmap_fmr(struct list_head *fmr_list) 711 { 712 struct rvt_fmr *fmr; 713 struct rvt_lkey_table *rkt; 714 unsigned long flags; 715 struct rvt_dev_info *rdi; 716 717 list_for_each_entry(fmr, fmr_list, ibfmr.list) { 718 rdi = ib_to_rvt(fmr->ibfmr.device); 719 rkt = &rdi->lkey_table; 720 spin_lock_irqsave(&rkt->lock, flags); 721 fmr->mr.user_base = 0; 722 fmr->mr.iova = 0; 723 fmr->mr.length = 0; 724 spin_unlock_irqrestore(&rkt->lock, flags); 725 } 726 return 0; 727 } 728 729 /** 730 * rvt_dealloc_fmr - deallocate a fast memory region 731 * @ibfmr: the fast memory region to deallocate 732 * 733 * Return: 0 on success. 734 */ 735 int rvt_dealloc_fmr(struct ib_fmr *ibfmr) 736 { 737 struct rvt_fmr *fmr = to_ifmr(ibfmr); 738 int ret = 0; 739 unsigned long timeout; 740 741 rvt_free_lkey(&fmr->mr); 742 rvt_put_mr(&fmr->mr); /* will set completion if last */ 743 timeout = wait_for_completion_timeout(&fmr->mr.comp, 5 * HZ); 744 if (!timeout) { 745 rvt_get_mr(&fmr->mr); 746 ret = -EBUSY; 747 goto out; 748 } 749 rvt_deinit_mregion(&fmr->mr); 750 kfree(fmr); 751 out: 752 return ret; 753 } 754 755 /** 756 * rvt_lkey_ok - check IB SGE for validity and initialize 757 * @rkt: table containing lkey to check SGE against 758 * @pd: protection domain 759 * @isge: outgoing internal SGE 760 * @sge: SGE to check 761 * @acc: access flags 762 * 763 * Check the IB SGE for validity and initialize our internal version 764 * of it. 765 * 766 * Return: 1 if valid and successful, otherwise returns 0. 767 * 768 * increments the reference count upon success 769 * 770 */ 771 int rvt_lkey_ok(struct rvt_lkey_table *rkt, struct rvt_pd *pd, 772 struct rvt_sge *isge, struct ib_sge *sge, int acc) 773 { 774 struct rvt_mregion *mr; 775 unsigned n, m; 776 size_t off; 777 struct rvt_dev_info *dev = ib_to_rvt(pd->ibpd.device); 778 779 /* 780 * We use LKEY == zero for kernel virtual addresses 781 * (see rvt_get_dma_mr and dma.c). 782 */ 783 rcu_read_lock(); 784 if (sge->lkey == 0) { 785 if (pd->user) 786 goto bail; 787 mr = rcu_dereference(dev->dma_mr); 788 if (!mr) 789 goto bail; 790 atomic_inc(&mr->refcount); 791 rcu_read_unlock(); 792 793 isge->mr = mr; 794 isge->vaddr = (void *)sge->addr; 795 isge->length = sge->length; 796 isge->sge_length = sge->length; 797 isge->m = 0; 798 isge->n = 0; 799 goto ok; 800 } 801 mr = rcu_dereference( 802 rkt->table[(sge->lkey >> (32 - dev->dparms.lkey_table_size))]); 803 if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || 804 mr->lkey != sge->lkey || mr->pd != &pd->ibpd)) 805 goto bail; 806 807 off = sge->addr - mr->user_base; 808 if (unlikely(sge->addr < mr->user_base || 809 off + sge->length > mr->length || 810 (mr->access_flags & acc) != acc)) 811 goto bail; 812 atomic_inc(&mr->refcount); 813 rcu_read_unlock(); 814 815 off += mr->offset; 816 if (mr->page_shift) { 817 /* 818 * page sizes are uniform power of 2 so no loop is necessary 819 * entries_spanned_by_off is the number of times the loop below 820 * would have executed. 821 */ 822 size_t entries_spanned_by_off; 823 824 entries_spanned_by_off = off >> mr->page_shift; 825 off -= (entries_spanned_by_off << mr->page_shift); 826 m = entries_spanned_by_off / RVT_SEGSZ; 827 n = entries_spanned_by_off % RVT_SEGSZ; 828 } else { 829 m = 0; 830 n = 0; 831 while (off >= mr->map[m]->segs[n].length) { 832 off -= mr->map[m]->segs[n].length; 833 n++; 834 if (n >= RVT_SEGSZ) { 835 m++; 836 n = 0; 837 } 838 } 839 } 840 isge->mr = mr; 841 isge->vaddr = mr->map[m]->segs[n].vaddr + off; 842 isge->length = mr->map[m]->segs[n].length - off; 843 isge->sge_length = sge->length; 844 isge->m = m; 845 isge->n = n; 846 ok: 847 return 1; 848 bail: 849 rcu_read_unlock(); 850 return 0; 851 } 852 EXPORT_SYMBOL(rvt_lkey_ok); 853 854 /** 855 * rvt_rkey_ok - check the IB virtual address, length, and RKEY 856 * @qp: qp for validation 857 * @sge: SGE state 858 * @len: length of data 859 * @vaddr: virtual address to place data 860 * @rkey: rkey to check 861 * @acc: access flags 862 * 863 * Return: 1 if successful, otherwise 0. 864 * 865 * increments the reference count upon success 866 */ 867 int rvt_rkey_ok(struct rvt_qp *qp, struct rvt_sge *sge, 868 u32 len, u64 vaddr, u32 rkey, int acc) 869 { 870 struct rvt_dev_info *dev = ib_to_rvt(qp->ibqp.device); 871 struct rvt_lkey_table *rkt = &dev->lkey_table; 872 struct rvt_mregion *mr; 873 unsigned n, m; 874 size_t off; 875 876 /* 877 * We use RKEY == zero for kernel virtual addresses 878 * (see rvt_get_dma_mr and dma.c). 879 */ 880 rcu_read_lock(); 881 if (rkey == 0) { 882 struct rvt_pd *pd = ibpd_to_rvtpd(qp->ibqp.pd); 883 struct rvt_dev_info *rdi = ib_to_rvt(pd->ibpd.device); 884 885 if (pd->user) 886 goto bail; 887 mr = rcu_dereference(rdi->dma_mr); 888 if (!mr) 889 goto bail; 890 atomic_inc(&mr->refcount); 891 rcu_read_unlock(); 892 893 sge->mr = mr; 894 sge->vaddr = (void *)vaddr; 895 sge->length = len; 896 sge->sge_length = len; 897 sge->m = 0; 898 sge->n = 0; 899 goto ok; 900 } 901 902 mr = rcu_dereference( 903 rkt->table[(rkey >> (32 - dev->dparms.lkey_table_size))]); 904 if (unlikely(!mr || atomic_read(&mr->lkey_invalid) || 905 mr->lkey != rkey || qp->ibqp.pd != mr->pd)) 906 goto bail; 907 908 off = vaddr - mr->iova; 909 if (unlikely(vaddr < mr->iova || off + len > mr->length || 910 (mr->access_flags & acc) == 0)) 911 goto bail; 912 atomic_inc(&mr->refcount); 913 rcu_read_unlock(); 914 915 off += mr->offset; 916 if (mr->page_shift) { 917 /* 918 * page sizes are uniform power of 2 so no loop is necessary 919 * entries_spanned_by_off is the number of times the loop below 920 * would have executed. 921 */ 922 size_t entries_spanned_by_off; 923 924 entries_spanned_by_off = off >> mr->page_shift; 925 off -= (entries_spanned_by_off << mr->page_shift); 926 m = entries_spanned_by_off / RVT_SEGSZ; 927 n = entries_spanned_by_off % RVT_SEGSZ; 928 } else { 929 m = 0; 930 n = 0; 931 while (off >= mr->map[m]->segs[n].length) { 932 off -= mr->map[m]->segs[n].length; 933 n++; 934 if (n >= RVT_SEGSZ) { 935 m++; 936 n = 0; 937 } 938 } 939 } 940 sge->mr = mr; 941 sge->vaddr = mr->map[m]->segs[n].vaddr + off; 942 sge->length = mr->map[m]->segs[n].length - off; 943 sge->sge_length = len; 944 sge->m = m; 945 sge->n = n; 946 ok: 947 return 1; 948 bail: 949 rcu_read_unlock(); 950 return 0; 951 } 952 EXPORT_SYMBOL(rvt_rkey_ok); 953