1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include "rxe.h" 8 #include "rxe_loc.h" 9 10 /* Return a random 8 bit key value that is 11 * different than the last_key. Set last_key to -1 12 * if this is the first key for an MR or MW 13 */ 14 u8 rxe_get_next_key(u32 last_key) 15 { 16 u8 key; 17 18 do { 19 get_random_bytes(&key, 1); 20 } while (key == last_key); 21 22 return key; 23 } 24 25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) 26 { 27 struct rxe_map_set *set = mr->cur_map_set; 28 29 switch (mr->type) { 30 case IB_MR_TYPE_DMA: 31 return 0; 32 33 case IB_MR_TYPE_USER: 34 case IB_MR_TYPE_MEM_REG: 35 if (iova < set->iova || length > set->length || 36 iova > set->iova + set->length - length) 37 return -EFAULT; 38 return 0; 39 40 default: 41 pr_warn("%s: mr type (%d) not supported\n", 42 __func__, mr->type); 43 return -EFAULT; 44 } 45 } 46 47 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 48 | IB_ACCESS_REMOTE_WRITE \ 49 | IB_ACCESS_REMOTE_ATOMIC) 50 51 static void rxe_mr_init(int access, struct rxe_mr *mr) 52 { 53 u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1); 54 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 55 56 /* set ibmr->l/rkey and also copy into private l/rkey 57 * for user MRs these will always be the same 58 * for cases where caller 'owns' the key portion 59 * they may be different until REG_MR WQE is executed. 60 */ 61 mr->lkey = mr->ibmr.lkey = lkey; 62 mr->rkey = mr->ibmr.rkey = rkey; 63 64 mr->state = RXE_MR_STATE_INVALID; 65 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 66 } 67 68 static void rxe_mr_free_map_set(int num_map, struct rxe_map_set *set) 69 { 70 int i; 71 72 for (i = 0; i < num_map; i++) 73 kfree(set->map[i]); 74 75 kfree(set->map); 76 kfree(set); 77 } 78 79 static int rxe_mr_alloc_map_set(int num_map, struct rxe_map_set **setp) 80 { 81 int i; 82 struct rxe_map_set *set; 83 84 set = kmalloc(sizeof(*set), GFP_KERNEL); 85 if (!set) 86 goto err_out; 87 88 set->map = kmalloc_array(num_map, sizeof(struct rxe_map *), GFP_KERNEL); 89 if (!set->map) 90 goto err_free_set; 91 92 for (i = 0; i < num_map; i++) { 93 set->map[i] = kmalloc(sizeof(struct rxe_map), GFP_KERNEL); 94 if (!set->map[i]) 95 goto err_free_map; 96 } 97 98 *setp = set; 99 100 return 0; 101 102 err_free_map: 103 for (i--; i >= 0; i--) 104 kfree(set->map[i]); 105 106 kfree(set->map); 107 err_free_set: 108 kfree(set); 109 err_out: 110 return -ENOMEM; 111 } 112 113 /** 114 * rxe_mr_alloc() - Allocate memory map array(s) for MR 115 * @mr: Memory region 116 * @num_buf: Number of buffer descriptors to support 117 * @both: If non zero allocate both mr->map and mr->next_map 118 * else just allocate mr->map. Used for fast MRs 119 * 120 * Return: 0 on success else an error 121 */ 122 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf, int both) 123 { 124 int ret; 125 int num_map; 126 127 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 128 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 129 130 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 131 mr->map_mask = RXE_BUF_PER_MAP - 1; 132 mr->num_buf = num_buf; 133 mr->max_buf = num_map * RXE_BUF_PER_MAP; 134 mr->num_map = num_map; 135 136 ret = rxe_mr_alloc_map_set(num_map, &mr->cur_map_set); 137 if (ret) 138 return -ENOMEM; 139 140 if (both) { 141 ret = rxe_mr_alloc_map_set(num_map, &mr->next_map_set); 142 if (ret) 143 goto err_free; 144 } 145 146 return 0; 147 148 err_free: 149 rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); 150 mr->cur_map_set = NULL; 151 return -ENOMEM; 152 } 153 154 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) 155 { 156 rxe_mr_init(access, mr); 157 158 mr->ibmr.pd = &pd->ibpd; 159 mr->access = access; 160 mr->state = RXE_MR_STATE_VALID; 161 mr->type = IB_MR_TYPE_DMA; 162 } 163 164 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, 165 int access, struct rxe_mr *mr) 166 { 167 struct rxe_map_set *set; 168 struct rxe_map **map; 169 struct rxe_phys_buf *buf = NULL; 170 struct ib_umem *umem; 171 struct sg_page_iter sg_iter; 172 int num_buf; 173 void *vaddr; 174 int err; 175 176 umem = ib_umem_get(pd->ibpd.device, start, length, access); 177 if (IS_ERR(umem)) { 178 pr_warn("%s: Unable to pin memory region err = %d\n", 179 __func__, (int)PTR_ERR(umem)); 180 err = PTR_ERR(umem); 181 goto err_out; 182 } 183 184 num_buf = ib_umem_num_pages(umem); 185 186 rxe_mr_init(access, mr); 187 188 err = rxe_mr_alloc(mr, num_buf, 0); 189 if (err) { 190 pr_warn("%s: Unable to allocate memory for map\n", 191 __func__); 192 goto err_release_umem; 193 } 194 195 set = mr->cur_map_set; 196 set->page_shift = PAGE_SHIFT; 197 set->page_mask = PAGE_SIZE - 1; 198 199 num_buf = 0; 200 map = set->map; 201 202 if (length > 0) { 203 buf = map[0]->buf; 204 205 for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { 206 if (num_buf >= RXE_BUF_PER_MAP) { 207 map++; 208 buf = map[0]->buf; 209 num_buf = 0; 210 } 211 212 vaddr = page_address(sg_page_iter_page(&sg_iter)); 213 if (!vaddr) { 214 pr_warn("%s: Unable to get virtual address\n", 215 __func__); 216 err = -ENOMEM; 217 goto err_release_umem; 218 } 219 220 buf->addr = (uintptr_t)vaddr; 221 buf->size = PAGE_SIZE; 222 num_buf++; 223 buf++; 224 } 225 } 226 227 mr->ibmr.pd = &pd->ibpd; 228 mr->umem = umem; 229 mr->access = access; 230 mr->state = RXE_MR_STATE_VALID; 231 mr->type = IB_MR_TYPE_USER; 232 233 set->length = length; 234 set->iova = iova; 235 set->va = start; 236 set->offset = ib_umem_offset(umem); 237 238 return 0; 239 240 err_release_umem: 241 ib_umem_release(umem); 242 err_out: 243 return err; 244 } 245 246 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) 247 { 248 int err; 249 250 /* always allow remote access for FMRs */ 251 rxe_mr_init(IB_ACCESS_REMOTE, mr); 252 253 err = rxe_mr_alloc(mr, max_pages, 1); 254 if (err) 255 goto err1; 256 257 mr->ibmr.pd = &pd->ibpd; 258 mr->max_buf = max_pages; 259 mr->state = RXE_MR_STATE_FREE; 260 mr->type = IB_MR_TYPE_MEM_REG; 261 262 return 0; 263 264 err1: 265 return err; 266 } 267 268 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, 269 size_t *offset_out) 270 { 271 struct rxe_map_set *set = mr->cur_map_set; 272 size_t offset = iova - set->iova + set->offset; 273 int map_index; 274 int buf_index; 275 u64 length; 276 struct rxe_map *map; 277 278 if (likely(set->page_shift)) { 279 *offset_out = offset & set->page_mask; 280 offset >>= set->page_shift; 281 *n_out = offset & mr->map_mask; 282 *m_out = offset >> mr->map_shift; 283 } else { 284 map_index = 0; 285 buf_index = 0; 286 287 map = set->map[map_index]; 288 length = map->buf[buf_index].size; 289 290 while (offset >= length) { 291 offset -= length; 292 buf_index++; 293 294 if (buf_index == RXE_BUF_PER_MAP) { 295 map_index++; 296 buf_index = 0; 297 } 298 map = set->map[map_index]; 299 length = map->buf[buf_index].size; 300 } 301 302 *m_out = map_index; 303 *n_out = buf_index; 304 *offset_out = offset; 305 } 306 } 307 308 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) 309 { 310 size_t offset; 311 int m, n; 312 void *addr; 313 314 if (mr->state != RXE_MR_STATE_VALID) { 315 pr_warn("mr not in valid state\n"); 316 addr = NULL; 317 goto out; 318 } 319 320 if (!mr->cur_map_set) { 321 addr = (void *)(uintptr_t)iova; 322 goto out; 323 } 324 325 if (mr_check_range(mr, iova, length)) { 326 pr_warn("range violation\n"); 327 addr = NULL; 328 goto out; 329 } 330 331 lookup_iova(mr, iova, &m, &n, &offset); 332 333 if (offset + length > mr->cur_map_set->map[m]->buf[n].size) { 334 pr_warn("crosses page boundary\n"); 335 addr = NULL; 336 goto out; 337 } 338 339 addr = (void *)(uintptr_t)mr->cur_map_set->map[m]->buf[n].addr + offset; 340 341 out: 342 return addr; 343 } 344 345 /* copy data from a range (vaddr, vaddr+length-1) to or from 346 * a mr object starting at iova. 347 */ 348 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, 349 enum rxe_mr_copy_dir dir) 350 { 351 int err; 352 int bytes; 353 u8 *va; 354 struct rxe_map **map; 355 struct rxe_phys_buf *buf; 356 int m; 357 int i; 358 size_t offset; 359 360 if (length == 0) 361 return 0; 362 363 if (mr->type == IB_MR_TYPE_DMA) { 364 u8 *src, *dest; 365 366 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); 367 368 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr; 369 370 memcpy(dest, src, length); 371 372 return 0; 373 } 374 375 WARN_ON_ONCE(!mr->cur_map_set); 376 377 err = mr_check_range(mr, iova, length); 378 if (err) { 379 err = -EFAULT; 380 goto err1; 381 } 382 383 lookup_iova(mr, iova, &m, &i, &offset); 384 385 map = mr->cur_map_set->map + m; 386 buf = map[0]->buf + i; 387 388 while (length > 0) { 389 u8 *src, *dest; 390 391 va = (u8 *)(uintptr_t)buf->addr + offset; 392 src = (dir == RXE_TO_MR_OBJ) ? addr : va; 393 dest = (dir == RXE_TO_MR_OBJ) ? va : addr; 394 395 bytes = buf->size - offset; 396 397 if (bytes > length) 398 bytes = length; 399 400 memcpy(dest, src, bytes); 401 402 length -= bytes; 403 addr += bytes; 404 405 offset = 0; 406 buf++; 407 i++; 408 409 if (i == RXE_BUF_PER_MAP) { 410 i = 0; 411 map++; 412 buf = map[0]->buf; 413 } 414 } 415 416 return 0; 417 418 err1: 419 return err; 420 } 421 422 /* copy data in or out of a wqe, i.e. sg list 423 * under the control of a dma descriptor 424 */ 425 int copy_data( 426 struct rxe_pd *pd, 427 int access, 428 struct rxe_dma_info *dma, 429 void *addr, 430 int length, 431 enum rxe_mr_copy_dir dir) 432 { 433 int bytes; 434 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 435 int offset = dma->sge_offset; 436 int resid = dma->resid; 437 struct rxe_mr *mr = NULL; 438 u64 iova; 439 int err; 440 441 if (length == 0) 442 return 0; 443 444 if (length > resid) { 445 err = -EINVAL; 446 goto err2; 447 } 448 449 if (sge->length && (offset < sge->length)) { 450 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL); 451 if (!mr) { 452 err = -EINVAL; 453 goto err1; 454 } 455 } 456 457 while (length > 0) { 458 bytes = length; 459 460 if (offset >= sge->length) { 461 if (mr) { 462 rxe_put(mr); 463 mr = NULL; 464 } 465 sge++; 466 dma->cur_sge++; 467 offset = 0; 468 469 if (dma->cur_sge >= dma->num_sge) { 470 err = -ENOSPC; 471 goto err2; 472 } 473 474 if (sge->length) { 475 mr = lookup_mr(pd, access, sge->lkey, 476 RXE_LOOKUP_LOCAL); 477 if (!mr) { 478 err = -EINVAL; 479 goto err1; 480 } 481 } else { 482 continue; 483 } 484 } 485 486 if (bytes > sge->length - offset) 487 bytes = sge->length - offset; 488 489 if (bytes > 0) { 490 iova = sge->addr + offset; 491 492 err = rxe_mr_copy(mr, iova, addr, bytes, dir); 493 if (err) 494 goto err2; 495 496 offset += bytes; 497 resid -= bytes; 498 length -= bytes; 499 addr += bytes; 500 } 501 } 502 503 dma->sge_offset = offset; 504 dma->resid = resid; 505 506 if (mr) 507 rxe_put(mr); 508 509 return 0; 510 511 err2: 512 if (mr) 513 rxe_put(mr); 514 err1: 515 return err; 516 } 517 518 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 519 { 520 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 521 int offset = dma->sge_offset; 522 int resid = dma->resid; 523 524 while (length) { 525 unsigned int bytes; 526 527 if (offset >= sge->length) { 528 sge++; 529 dma->cur_sge++; 530 offset = 0; 531 if (dma->cur_sge >= dma->num_sge) 532 return -ENOSPC; 533 } 534 535 bytes = length; 536 537 if (bytes > sge->length - offset) 538 bytes = sge->length - offset; 539 540 offset += bytes; 541 resid -= bytes; 542 length -= bytes; 543 } 544 545 dma->sge_offset = offset; 546 dma->resid = resid; 547 548 return 0; 549 } 550 551 /* (1) find the mr corresponding to lkey/rkey 552 * depending on lookup_type 553 * (2) verify that the (qp) pd matches the mr pd 554 * (3) verify that the mr can support the requested access 555 * (4) verify that mr state is valid 556 */ 557 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, 558 enum rxe_mr_lookup_type type) 559 { 560 struct rxe_mr *mr; 561 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 562 int index = key >> 8; 563 564 mr = rxe_pool_get_index(&rxe->mr_pool, index); 565 if (!mr) 566 return NULL; 567 568 if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) || 569 (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || 570 mr_pd(mr) != pd || (access && !(access & mr->access)) || 571 mr->state != RXE_MR_STATE_VALID)) { 572 rxe_put(mr); 573 mr = NULL; 574 } 575 576 return mr; 577 } 578 579 int rxe_invalidate_mr(struct rxe_qp *qp, u32 rkey) 580 { 581 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 582 struct rxe_mr *mr; 583 int ret; 584 585 mr = rxe_pool_get_index(&rxe->mr_pool, rkey >> 8); 586 if (!mr) { 587 pr_err("%s: No MR for rkey %#x\n", __func__, rkey); 588 ret = -EINVAL; 589 goto err; 590 } 591 592 if (rkey != mr->rkey) { 593 pr_err("%s: rkey (%#x) doesn't match mr->rkey (%#x)\n", 594 __func__, rkey, mr->rkey); 595 ret = -EINVAL; 596 goto err_drop_ref; 597 } 598 599 if (atomic_read(&mr->num_mw) > 0) { 600 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n", 601 __func__); 602 ret = -EINVAL; 603 goto err_drop_ref; 604 } 605 606 if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) { 607 pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type); 608 ret = -EINVAL; 609 goto err_drop_ref; 610 } 611 612 mr->state = RXE_MR_STATE_FREE; 613 ret = 0; 614 615 err_drop_ref: 616 rxe_put(mr); 617 err: 618 return ret; 619 } 620 621 /* user can (re)register fast MR by executing a REG_MR WQE. 622 * user is expected to hold a reference on the ib mr until the 623 * WQE completes. 624 * Once a fast MR is created this is the only way to change the 625 * private keys. It is the responsibility of the user to maintain 626 * the ib mr keys in sync with rxe mr keys. 627 */ 628 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) 629 { 630 struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr); 631 u32 key = wqe->wr.wr.reg.key & 0xff; 632 u32 access = wqe->wr.wr.reg.access; 633 struct rxe_map_set *set; 634 635 /* user can only register MR in free state */ 636 if (unlikely(mr->state != RXE_MR_STATE_FREE)) { 637 pr_warn("%s: mr->lkey = 0x%x not free\n", 638 __func__, mr->lkey); 639 return -EINVAL; 640 } 641 642 /* user can only register mr with qp in same protection domain */ 643 if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) { 644 pr_warn("%s: qp->pd and mr->pd don't match\n", 645 __func__); 646 return -EINVAL; 647 } 648 649 mr->access = access; 650 mr->lkey = (mr->lkey & ~0xff) | key; 651 mr->rkey = (access & IB_ACCESS_REMOTE) ? mr->lkey : 0; 652 mr->state = RXE_MR_STATE_VALID; 653 654 set = mr->cur_map_set; 655 mr->cur_map_set = mr->next_map_set; 656 mr->cur_map_set->iova = wqe->wr.wr.reg.mr->iova; 657 mr->next_map_set = set; 658 659 return 0; 660 } 661 662 int rxe_mr_set_page(struct ib_mr *ibmr, u64 addr) 663 { 664 struct rxe_mr *mr = to_rmr(ibmr); 665 struct rxe_map_set *set = mr->next_map_set; 666 struct rxe_map *map; 667 struct rxe_phys_buf *buf; 668 669 if (unlikely(set->nbuf == mr->num_buf)) 670 return -ENOMEM; 671 672 map = set->map[set->nbuf / RXE_BUF_PER_MAP]; 673 buf = &map->buf[set->nbuf % RXE_BUF_PER_MAP]; 674 675 buf->addr = addr; 676 buf->size = ibmr->page_size; 677 set->nbuf++; 678 679 return 0; 680 } 681 682 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 683 { 684 struct rxe_mr *mr = to_rmr(ibmr); 685 686 /* See IBA 10.6.7.2.6 */ 687 if (atomic_read(&mr->num_mw) > 0) 688 return -EINVAL; 689 690 rxe_put(mr); 691 692 return 0; 693 } 694 695 void rxe_mr_cleanup(struct rxe_pool_elem *elem) 696 { 697 struct rxe_mr *mr = container_of(elem, typeof(*mr), elem); 698 699 rxe_put(mr_pd(mr)); 700 701 ib_umem_release(mr->umem); 702 703 if (mr->cur_map_set) 704 rxe_mr_free_map_set(mr->num_map, mr->cur_map_set); 705 706 if (mr->next_map_set) 707 rxe_mr_free_map_set(mr->num_map, mr->next_map_set); 708 } 709