1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include "rxe.h" 8 #include "rxe_loc.h" 9 10 /* Return a random 8 bit key value that is 11 * different than the last_key. Set last_key to -1 12 * if this is the first key for an MR or MW 13 */ 14 u8 rxe_get_next_key(u32 last_key) 15 { 16 u8 key; 17 18 do { 19 get_random_bytes(&key, 1); 20 } while (key == last_key); 21 22 return key; 23 } 24 25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) 26 { 27 28 29 switch (mr->type) { 30 case IB_MR_TYPE_DMA: 31 return 0; 32 33 case IB_MR_TYPE_USER: 34 case IB_MR_TYPE_MEM_REG: 35 if (iova < mr->iova || length > mr->length || 36 iova > mr->iova + mr->length - length) 37 return -EFAULT; 38 return 0; 39 40 default: 41 pr_warn("%s: mr type (%d) not supported\n", 42 __func__, mr->type); 43 return -EFAULT; 44 } 45 } 46 47 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 48 | IB_ACCESS_REMOTE_WRITE \ 49 | IB_ACCESS_REMOTE_ATOMIC) 50 51 static void rxe_mr_init(int access, struct rxe_mr *mr) 52 { 53 u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1); 54 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 55 56 /* set ibmr->l/rkey and also copy into private l/rkey 57 * for user MRs these will always be the same 58 * for cases where caller 'owns' the key portion 59 * they may be different until REG_MR WQE is executed. 60 */ 61 mr->lkey = mr->ibmr.lkey = lkey; 62 mr->rkey = mr->ibmr.rkey = rkey; 63 64 mr->state = RXE_MR_STATE_INVALID; 65 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 66 } 67 68 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) 69 { 70 int i; 71 int num_map; 72 struct rxe_map **map = mr->map; 73 74 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 75 76 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 77 if (!mr->map) 78 goto err1; 79 80 for (i = 0; i < num_map; i++) { 81 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 82 if (!mr->map[i]) 83 goto err2; 84 } 85 86 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 87 88 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 89 mr->map_mask = RXE_BUF_PER_MAP - 1; 90 91 mr->num_buf = num_buf; 92 mr->num_map = num_map; 93 mr->max_buf = num_map * RXE_BUF_PER_MAP; 94 95 return 0; 96 97 err2: 98 for (i--; i >= 0; i--) 99 kfree(mr->map[i]); 100 101 kfree(mr->map); 102 err1: 103 return -ENOMEM; 104 } 105 106 void rxe_mr_init_dma(struct rxe_pd *pd, int access, struct rxe_mr *mr) 107 { 108 rxe_mr_init(access, mr); 109 110 mr->ibmr.pd = &pd->ibpd; 111 mr->access = access; 112 mr->state = RXE_MR_STATE_VALID; 113 mr->type = IB_MR_TYPE_DMA; 114 } 115 116 int rxe_mr_init_user(struct rxe_pd *pd, u64 start, u64 length, u64 iova, 117 int access, struct rxe_mr *mr) 118 { 119 struct rxe_map **map; 120 struct rxe_phys_buf *buf = NULL; 121 struct ib_umem *umem; 122 struct sg_page_iter sg_iter; 123 int num_buf; 124 void *vaddr; 125 int err; 126 int i; 127 128 umem = ib_umem_get(pd->ibpd.device, start, length, access); 129 if (IS_ERR(umem)) { 130 pr_warn("%s: Unable to pin memory region err = %d\n", 131 __func__, (int)PTR_ERR(umem)); 132 err = PTR_ERR(umem); 133 goto err_out; 134 } 135 136 num_buf = ib_umem_num_pages(umem); 137 138 rxe_mr_init(access, mr); 139 140 err = rxe_mr_alloc(mr, num_buf); 141 if (err) { 142 pr_warn("%s: Unable to allocate memory for map\n", 143 __func__); 144 goto err_release_umem; 145 } 146 147 mr->page_shift = PAGE_SHIFT; 148 mr->page_mask = PAGE_SIZE - 1; 149 150 num_buf = 0; 151 map = mr->map; 152 if (length > 0) { 153 buf = map[0]->buf; 154 155 for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { 156 if (num_buf >= RXE_BUF_PER_MAP) { 157 map++; 158 buf = map[0]->buf; 159 num_buf = 0; 160 } 161 162 vaddr = page_address(sg_page_iter_page(&sg_iter)); 163 if (!vaddr) { 164 pr_warn("%s: Unable to get virtual address\n", 165 __func__); 166 err = -ENOMEM; 167 goto err_cleanup_map; 168 } 169 170 buf->addr = (uintptr_t)vaddr; 171 buf->size = PAGE_SIZE; 172 num_buf++; 173 buf++; 174 175 } 176 } 177 178 mr->ibmr.pd = &pd->ibpd; 179 mr->umem = umem; 180 mr->access = access; 181 mr->length = length; 182 mr->iova = iova; 183 mr->va = start; 184 mr->offset = ib_umem_offset(umem); 185 mr->state = RXE_MR_STATE_VALID; 186 mr->type = IB_MR_TYPE_USER; 187 188 return 0; 189 190 err_cleanup_map: 191 for (i = 0; i < mr->num_map; i++) 192 kfree(mr->map[i]); 193 kfree(mr->map); 194 err_release_umem: 195 ib_umem_release(umem); 196 err_out: 197 return err; 198 } 199 200 int rxe_mr_init_fast(struct rxe_pd *pd, int max_pages, struct rxe_mr *mr) 201 { 202 int err; 203 204 /* always allow remote access for FMRs */ 205 rxe_mr_init(IB_ACCESS_REMOTE, mr); 206 207 err = rxe_mr_alloc(mr, max_pages); 208 if (err) 209 goto err1; 210 211 mr->ibmr.pd = &pd->ibpd; 212 mr->max_buf = max_pages; 213 mr->state = RXE_MR_STATE_FREE; 214 mr->type = IB_MR_TYPE_MEM_REG; 215 216 return 0; 217 218 err1: 219 return err; 220 } 221 222 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, 223 size_t *offset_out) 224 { 225 size_t offset = iova - mr->iova + mr->offset; 226 int map_index; 227 int buf_index; 228 u64 length; 229 230 if (likely(mr->page_shift)) { 231 *offset_out = offset & mr->page_mask; 232 offset >>= mr->page_shift; 233 *n_out = offset & mr->map_mask; 234 *m_out = offset >> mr->map_shift; 235 } else { 236 map_index = 0; 237 buf_index = 0; 238 239 length = mr->map[map_index]->buf[buf_index].size; 240 241 while (offset >= length) { 242 offset -= length; 243 buf_index++; 244 245 if (buf_index == RXE_BUF_PER_MAP) { 246 map_index++; 247 buf_index = 0; 248 } 249 length = mr->map[map_index]->buf[buf_index].size; 250 } 251 252 *m_out = map_index; 253 *n_out = buf_index; 254 *offset_out = offset; 255 } 256 } 257 258 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) 259 { 260 size_t offset; 261 int m, n; 262 void *addr; 263 264 if (mr->state != RXE_MR_STATE_VALID) { 265 pr_warn("mr not in valid state\n"); 266 addr = NULL; 267 goto out; 268 } 269 270 if (!mr->map) { 271 addr = (void *)(uintptr_t)iova; 272 goto out; 273 } 274 275 if (mr_check_range(mr, iova, length)) { 276 pr_warn("range violation\n"); 277 addr = NULL; 278 goto out; 279 } 280 281 lookup_iova(mr, iova, &m, &n, &offset); 282 283 if (offset + length > mr->map[m]->buf[n].size) { 284 pr_warn("crosses page boundary\n"); 285 addr = NULL; 286 goto out; 287 } 288 289 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; 290 291 out: 292 return addr; 293 } 294 295 /* copy data from a range (vaddr, vaddr+length-1) to or from 296 * a mr object starting at iova. 297 */ 298 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, 299 enum rxe_mr_copy_dir dir) 300 { 301 int err; 302 int bytes; 303 u8 *va; 304 struct rxe_map **map; 305 struct rxe_phys_buf *buf; 306 int m; 307 int i; 308 size_t offset; 309 310 if (length == 0) 311 return 0; 312 313 if (mr->type == IB_MR_TYPE_DMA) { 314 u8 *src, *dest; 315 316 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); 317 318 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr; 319 320 memcpy(dest, src, length); 321 322 return 0; 323 } 324 325 WARN_ON_ONCE(!mr->map); 326 327 err = mr_check_range(mr, iova, length); 328 if (err) { 329 err = -EFAULT; 330 goto err1; 331 } 332 333 lookup_iova(mr, iova, &m, &i, &offset); 334 335 map = mr->map + m; 336 buf = map[0]->buf + i; 337 338 while (length > 0) { 339 u8 *src, *dest; 340 341 va = (u8 *)(uintptr_t)buf->addr + offset; 342 src = (dir == RXE_TO_MR_OBJ) ? addr : va; 343 dest = (dir == RXE_TO_MR_OBJ) ? va : addr; 344 345 bytes = buf->size - offset; 346 347 if (bytes > length) 348 bytes = length; 349 350 memcpy(dest, src, bytes); 351 352 length -= bytes; 353 addr += bytes; 354 355 offset = 0; 356 buf++; 357 i++; 358 359 if (i == RXE_BUF_PER_MAP) { 360 i = 0; 361 map++; 362 buf = map[0]->buf; 363 } 364 } 365 366 return 0; 367 368 err1: 369 return err; 370 } 371 372 /* copy data in or out of a wqe, i.e. sg list 373 * under the control of a dma descriptor 374 */ 375 int copy_data( 376 struct rxe_pd *pd, 377 int access, 378 struct rxe_dma_info *dma, 379 void *addr, 380 int length, 381 enum rxe_mr_copy_dir dir) 382 { 383 int bytes; 384 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 385 int offset = dma->sge_offset; 386 int resid = dma->resid; 387 struct rxe_mr *mr = NULL; 388 u64 iova; 389 int err; 390 391 if (length == 0) 392 return 0; 393 394 if (length > resid) { 395 err = -EINVAL; 396 goto err2; 397 } 398 399 if (sge->length && (offset < sge->length)) { 400 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL); 401 if (!mr) { 402 err = -EINVAL; 403 goto err1; 404 } 405 } 406 407 while (length > 0) { 408 bytes = length; 409 410 if (offset >= sge->length) { 411 if (mr) { 412 rxe_put(mr); 413 mr = NULL; 414 } 415 sge++; 416 dma->cur_sge++; 417 offset = 0; 418 419 if (dma->cur_sge >= dma->num_sge) { 420 err = -ENOSPC; 421 goto err2; 422 } 423 424 if (sge->length) { 425 mr = lookup_mr(pd, access, sge->lkey, 426 RXE_LOOKUP_LOCAL); 427 if (!mr) { 428 err = -EINVAL; 429 goto err1; 430 } 431 } else { 432 continue; 433 } 434 } 435 436 if (bytes > sge->length - offset) 437 bytes = sge->length - offset; 438 439 if (bytes > 0) { 440 iova = sge->addr + offset; 441 442 err = rxe_mr_copy(mr, iova, addr, bytes, dir); 443 if (err) 444 goto err2; 445 446 offset += bytes; 447 resid -= bytes; 448 length -= bytes; 449 addr += bytes; 450 } 451 } 452 453 dma->sge_offset = offset; 454 dma->resid = resid; 455 456 if (mr) 457 rxe_put(mr); 458 459 return 0; 460 461 err2: 462 if (mr) 463 rxe_put(mr); 464 err1: 465 return err; 466 } 467 468 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 469 { 470 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 471 int offset = dma->sge_offset; 472 int resid = dma->resid; 473 474 while (length) { 475 unsigned int bytes; 476 477 if (offset >= sge->length) { 478 sge++; 479 dma->cur_sge++; 480 offset = 0; 481 if (dma->cur_sge >= dma->num_sge) 482 return -ENOSPC; 483 } 484 485 bytes = length; 486 487 if (bytes > sge->length - offset) 488 bytes = sge->length - offset; 489 490 offset += bytes; 491 resid -= bytes; 492 length -= bytes; 493 } 494 495 dma->sge_offset = offset; 496 dma->resid = resid; 497 498 return 0; 499 } 500 501 /* (1) find the mr corresponding to lkey/rkey 502 * depending on lookup_type 503 * (2) verify that the (qp) pd matches the mr pd 504 * (3) verify that the mr can support the requested access 505 * (4) verify that mr state is valid 506 */ 507 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, 508 enum rxe_mr_lookup_type type) 509 { 510 struct rxe_mr *mr; 511 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 512 int index = key >> 8; 513 514 mr = rxe_pool_get_index(&rxe->mr_pool, index); 515 if (!mr) 516 return NULL; 517 518 if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) || 519 (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || 520 mr_pd(mr) != pd || (access && !(access & mr->access)) || 521 mr->state != RXE_MR_STATE_VALID)) { 522 rxe_put(mr); 523 mr = NULL; 524 } 525 526 return mr; 527 } 528 529 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) 530 { 531 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 532 struct rxe_mr *mr; 533 int ret; 534 535 mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); 536 if (!mr) { 537 pr_err("%s: No MR for key %#x\n", __func__, key); 538 ret = -EINVAL; 539 goto err; 540 } 541 542 if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) { 543 pr_err("%s: wr key (%#x) doesn't match mr key (%#x)\n", 544 __func__, key, (mr->rkey ? mr->rkey : mr->lkey)); 545 ret = -EINVAL; 546 goto err_drop_ref; 547 } 548 549 if (atomic_read(&mr->num_mw) > 0) { 550 pr_warn("%s: Attempt to invalidate an MR while bound to MWs\n", 551 __func__); 552 ret = -EINVAL; 553 goto err_drop_ref; 554 } 555 556 if (unlikely(mr->type != IB_MR_TYPE_MEM_REG)) { 557 pr_warn("%s: mr->type (%d) is wrong type\n", __func__, mr->type); 558 ret = -EINVAL; 559 goto err_drop_ref; 560 } 561 562 mr->state = RXE_MR_STATE_FREE; 563 ret = 0; 564 565 err_drop_ref: 566 rxe_put(mr); 567 err: 568 return ret; 569 } 570 571 /* user can (re)register fast MR by executing a REG_MR WQE. 572 * user is expected to hold a reference on the ib mr until the 573 * WQE completes. 574 * Once a fast MR is created this is the only way to change the 575 * private keys. It is the responsibility of the user to maintain 576 * the ib mr keys in sync with rxe mr keys. 577 */ 578 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) 579 { 580 struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr); 581 u32 key = wqe->wr.wr.reg.key; 582 u32 access = wqe->wr.wr.reg.access; 583 584 /* user can only register MR in free state */ 585 if (unlikely(mr->state != RXE_MR_STATE_FREE)) { 586 pr_warn("%s: mr->lkey = 0x%x not free\n", 587 __func__, mr->lkey); 588 return -EINVAL; 589 } 590 591 /* user can only register mr with qp in same protection domain */ 592 if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) { 593 pr_warn("%s: qp->pd and mr->pd don't match\n", 594 __func__); 595 return -EINVAL; 596 } 597 598 /* user is only allowed to change key portion of l/rkey */ 599 if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) { 600 pr_warn("%s: key = 0x%x has wrong index mr->lkey = 0x%x\n", 601 __func__, key, mr->lkey); 602 return -EINVAL; 603 } 604 605 mr->access = access; 606 mr->lkey = key; 607 mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0; 608 mr->iova = wqe->wr.wr.reg.mr->iova; 609 mr->state = RXE_MR_STATE_VALID; 610 611 return 0; 612 } 613 614 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 615 { 616 struct rxe_mr *mr = to_rmr(ibmr); 617 618 /* See IBA 10.6.7.2.6 */ 619 if (atomic_read(&mr->num_mw) > 0) 620 return -EINVAL; 621 622 rxe_cleanup(mr); 623 624 return 0; 625 } 626 627 void rxe_mr_cleanup(struct rxe_pool_elem *elem) 628 { 629 struct rxe_mr *mr = container_of(elem, typeof(*mr), elem); 630 int i; 631 632 rxe_put(mr_pd(mr)); 633 ib_umem_release(mr->umem); 634 635 if (mr->map) { 636 for (i = 0; i < mr->num_map; i++) 637 kfree(mr->map[i]); 638 639 kfree(mr->map); 640 } 641 } 642