1 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB 2 /* 3 * Copyright (c) 2016 Mellanox Technologies Ltd. All rights reserved. 4 * Copyright (c) 2015 System Fabric Works, Inc. All rights reserved. 5 */ 6 7 #include "rxe.h" 8 #include "rxe_loc.h" 9 10 /* Return a random 8 bit key value that is 11 * different than the last_key. Set last_key to -1 12 * if this is the first key for an MR or MW 13 */ 14 u8 rxe_get_next_key(u32 last_key) 15 { 16 u8 key; 17 18 do { 19 get_random_bytes(&key, 1); 20 } while (key == last_key); 21 22 return key; 23 } 24 25 int mr_check_range(struct rxe_mr *mr, u64 iova, size_t length) 26 { 27 28 29 switch (mr->ibmr.type) { 30 case IB_MR_TYPE_DMA: 31 return 0; 32 33 case IB_MR_TYPE_USER: 34 case IB_MR_TYPE_MEM_REG: 35 if (iova < mr->ibmr.iova || length > mr->ibmr.length || 36 iova > mr->ibmr.iova + mr->ibmr.length - length) 37 return -EFAULT; 38 return 0; 39 40 default: 41 rxe_dbg_mr(mr, "type (%d) not supported\n", mr->ibmr.type); 42 return -EFAULT; 43 } 44 } 45 46 #define IB_ACCESS_REMOTE (IB_ACCESS_REMOTE_READ \ 47 | IB_ACCESS_REMOTE_WRITE \ 48 | IB_ACCESS_REMOTE_ATOMIC) 49 50 static void rxe_mr_init(int access, struct rxe_mr *mr) 51 { 52 u32 lkey = mr->elem.index << 8 | rxe_get_next_key(-1); 53 u32 rkey = (access & IB_ACCESS_REMOTE) ? lkey : 0; 54 55 /* set ibmr->l/rkey and also copy into private l/rkey 56 * for user MRs these will always be the same 57 * for cases where caller 'owns' the key portion 58 * they may be different until REG_MR WQE is executed. 59 */ 60 mr->lkey = mr->ibmr.lkey = lkey; 61 mr->rkey = mr->ibmr.rkey = rkey; 62 63 mr->state = RXE_MR_STATE_INVALID; 64 } 65 66 static int rxe_mr_alloc(struct rxe_mr *mr, int num_buf) 67 { 68 int i; 69 int num_map; 70 struct rxe_map **map = mr->map; 71 72 num_map = (num_buf + RXE_BUF_PER_MAP - 1) / RXE_BUF_PER_MAP; 73 74 mr->map = kmalloc_array(num_map, sizeof(*map), GFP_KERNEL); 75 if (!mr->map) 76 goto err1; 77 78 for (i = 0; i < num_map; i++) { 79 mr->map[i] = kmalloc(sizeof(**map), GFP_KERNEL); 80 if (!mr->map[i]) 81 goto err2; 82 } 83 84 BUILD_BUG_ON(!is_power_of_2(RXE_BUF_PER_MAP)); 85 86 mr->map_shift = ilog2(RXE_BUF_PER_MAP); 87 mr->map_mask = RXE_BUF_PER_MAP - 1; 88 89 mr->num_buf = num_buf; 90 mr->num_map = num_map; 91 mr->max_buf = num_map * RXE_BUF_PER_MAP; 92 93 return 0; 94 95 err2: 96 for (i--; i >= 0; i--) 97 kfree(mr->map[i]); 98 99 kfree(mr->map); 100 err1: 101 return -ENOMEM; 102 } 103 104 void rxe_mr_init_dma(int access, struct rxe_mr *mr) 105 { 106 rxe_mr_init(access, mr); 107 108 mr->access = access; 109 mr->state = RXE_MR_STATE_VALID; 110 mr->ibmr.type = IB_MR_TYPE_DMA; 111 } 112 113 int rxe_mr_init_user(struct rxe_dev *rxe, u64 start, u64 length, u64 iova, 114 int access, struct rxe_mr *mr) 115 { 116 struct rxe_map **map; 117 struct rxe_phys_buf *buf = NULL; 118 struct ib_umem *umem; 119 struct sg_page_iter sg_iter; 120 int num_buf; 121 void *vaddr; 122 int err; 123 int i; 124 125 umem = ib_umem_get(&rxe->ib_dev, start, length, access); 126 if (IS_ERR(umem)) { 127 rxe_dbg_mr(mr, "Unable to pin memory region err = %d\n", 128 (int)PTR_ERR(umem)); 129 err = PTR_ERR(umem); 130 goto err_out; 131 } 132 133 num_buf = ib_umem_num_pages(umem); 134 135 rxe_mr_init(access, mr); 136 137 err = rxe_mr_alloc(mr, num_buf); 138 if (err) { 139 rxe_dbg_mr(mr, "Unable to allocate memory for map\n"); 140 goto err_release_umem; 141 } 142 143 mr->page_shift = PAGE_SHIFT; 144 mr->page_mask = PAGE_SIZE - 1; 145 146 num_buf = 0; 147 map = mr->map; 148 if (length > 0) { 149 buf = map[0]->buf; 150 151 for_each_sgtable_page (&umem->sgt_append.sgt, &sg_iter, 0) { 152 if (num_buf >= RXE_BUF_PER_MAP) { 153 map++; 154 buf = map[0]->buf; 155 num_buf = 0; 156 } 157 158 vaddr = page_address(sg_page_iter_page(&sg_iter)); 159 if (!vaddr) { 160 rxe_dbg_mr(mr, "Unable to get virtual address\n"); 161 err = -ENOMEM; 162 goto err_cleanup_map; 163 } 164 165 buf->addr = (uintptr_t)vaddr; 166 buf->size = PAGE_SIZE; 167 num_buf++; 168 buf++; 169 170 } 171 } 172 173 mr->umem = umem; 174 mr->access = access; 175 mr->offset = ib_umem_offset(umem); 176 mr->state = RXE_MR_STATE_VALID; 177 mr->ibmr.type = IB_MR_TYPE_USER; 178 179 return 0; 180 181 err_cleanup_map: 182 for (i = 0; i < mr->num_map; i++) 183 kfree(mr->map[i]); 184 kfree(mr->map); 185 err_release_umem: 186 ib_umem_release(umem); 187 err_out: 188 return err; 189 } 190 191 int rxe_mr_init_fast(int max_pages, struct rxe_mr *mr) 192 { 193 int err; 194 195 /* always allow remote access for FMRs */ 196 rxe_mr_init(IB_ACCESS_REMOTE, mr); 197 198 err = rxe_mr_alloc(mr, max_pages); 199 if (err) 200 goto err1; 201 202 mr->max_buf = max_pages; 203 mr->state = RXE_MR_STATE_FREE; 204 mr->ibmr.type = IB_MR_TYPE_MEM_REG; 205 206 return 0; 207 208 err1: 209 return err; 210 } 211 212 static void lookup_iova(struct rxe_mr *mr, u64 iova, int *m_out, int *n_out, 213 size_t *offset_out) 214 { 215 size_t offset = iova - mr->ibmr.iova + mr->offset; 216 int map_index; 217 int buf_index; 218 u64 length; 219 220 if (likely(mr->page_shift)) { 221 *offset_out = offset & mr->page_mask; 222 offset >>= mr->page_shift; 223 *n_out = offset & mr->map_mask; 224 *m_out = offset >> mr->map_shift; 225 } else { 226 map_index = 0; 227 buf_index = 0; 228 229 length = mr->map[map_index]->buf[buf_index].size; 230 231 while (offset >= length) { 232 offset -= length; 233 buf_index++; 234 235 if (buf_index == RXE_BUF_PER_MAP) { 236 map_index++; 237 buf_index = 0; 238 } 239 length = mr->map[map_index]->buf[buf_index].size; 240 } 241 242 *m_out = map_index; 243 *n_out = buf_index; 244 *offset_out = offset; 245 } 246 } 247 248 void *iova_to_vaddr(struct rxe_mr *mr, u64 iova, int length) 249 { 250 size_t offset; 251 int m, n; 252 void *addr; 253 254 if (mr->state != RXE_MR_STATE_VALID) { 255 rxe_dbg_mr(mr, "Not in valid state\n"); 256 addr = NULL; 257 goto out; 258 } 259 260 if (!mr->map) { 261 addr = (void *)(uintptr_t)iova; 262 goto out; 263 } 264 265 if (mr_check_range(mr, iova, length)) { 266 rxe_dbg_mr(mr, "Range violation\n"); 267 addr = NULL; 268 goto out; 269 } 270 271 lookup_iova(mr, iova, &m, &n, &offset); 272 273 if (offset + length > mr->map[m]->buf[n].size) { 274 rxe_dbg_mr(mr, "Crosses page boundary\n"); 275 addr = NULL; 276 goto out; 277 } 278 279 addr = (void *)(uintptr_t)mr->map[m]->buf[n].addr + offset; 280 281 out: 282 return addr; 283 } 284 285 /* copy data from a range (vaddr, vaddr+length-1) to or from 286 * a mr object starting at iova. 287 */ 288 int rxe_mr_copy(struct rxe_mr *mr, u64 iova, void *addr, int length, 289 enum rxe_mr_copy_dir dir) 290 { 291 int err; 292 int bytes; 293 u8 *va; 294 struct rxe_map **map; 295 struct rxe_phys_buf *buf; 296 int m; 297 int i; 298 size_t offset; 299 300 if (length == 0) 301 return 0; 302 303 if (mr->ibmr.type == IB_MR_TYPE_DMA) { 304 u8 *src, *dest; 305 306 src = (dir == RXE_TO_MR_OBJ) ? addr : ((void *)(uintptr_t)iova); 307 308 dest = (dir == RXE_TO_MR_OBJ) ? ((void *)(uintptr_t)iova) : addr; 309 310 memcpy(dest, src, length); 311 312 return 0; 313 } 314 315 WARN_ON_ONCE(!mr->map); 316 317 err = mr_check_range(mr, iova, length); 318 if (err) { 319 err = -EFAULT; 320 goto err1; 321 } 322 323 lookup_iova(mr, iova, &m, &i, &offset); 324 325 map = mr->map + m; 326 buf = map[0]->buf + i; 327 328 while (length > 0) { 329 u8 *src, *dest; 330 331 va = (u8 *)(uintptr_t)buf->addr + offset; 332 src = (dir == RXE_TO_MR_OBJ) ? addr : va; 333 dest = (dir == RXE_TO_MR_OBJ) ? va : addr; 334 335 bytes = buf->size - offset; 336 337 if (bytes > length) 338 bytes = length; 339 340 memcpy(dest, src, bytes); 341 342 length -= bytes; 343 addr += bytes; 344 345 offset = 0; 346 buf++; 347 i++; 348 349 if (i == RXE_BUF_PER_MAP) { 350 i = 0; 351 map++; 352 buf = map[0]->buf; 353 } 354 } 355 356 return 0; 357 358 err1: 359 return err; 360 } 361 362 /* copy data in or out of a wqe, i.e. sg list 363 * under the control of a dma descriptor 364 */ 365 int copy_data( 366 struct rxe_pd *pd, 367 int access, 368 struct rxe_dma_info *dma, 369 void *addr, 370 int length, 371 enum rxe_mr_copy_dir dir) 372 { 373 int bytes; 374 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 375 int offset = dma->sge_offset; 376 int resid = dma->resid; 377 struct rxe_mr *mr = NULL; 378 u64 iova; 379 int err; 380 381 if (length == 0) 382 return 0; 383 384 if (length > resid) { 385 err = -EINVAL; 386 goto err2; 387 } 388 389 if (sge->length && (offset < sge->length)) { 390 mr = lookup_mr(pd, access, sge->lkey, RXE_LOOKUP_LOCAL); 391 if (!mr) { 392 err = -EINVAL; 393 goto err1; 394 } 395 } 396 397 while (length > 0) { 398 bytes = length; 399 400 if (offset >= sge->length) { 401 if (mr) { 402 rxe_put(mr); 403 mr = NULL; 404 } 405 sge++; 406 dma->cur_sge++; 407 offset = 0; 408 409 if (dma->cur_sge >= dma->num_sge) { 410 err = -ENOSPC; 411 goto err2; 412 } 413 414 if (sge->length) { 415 mr = lookup_mr(pd, access, sge->lkey, 416 RXE_LOOKUP_LOCAL); 417 if (!mr) { 418 err = -EINVAL; 419 goto err1; 420 } 421 } else { 422 continue; 423 } 424 } 425 426 if (bytes > sge->length - offset) 427 bytes = sge->length - offset; 428 429 if (bytes > 0) { 430 iova = sge->addr + offset; 431 432 err = rxe_mr_copy(mr, iova, addr, bytes, dir); 433 if (err) 434 goto err2; 435 436 offset += bytes; 437 resid -= bytes; 438 length -= bytes; 439 addr += bytes; 440 } 441 } 442 443 dma->sge_offset = offset; 444 dma->resid = resid; 445 446 if (mr) 447 rxe_put(mr); 448 449 return 0; 450 451 err2: 452 if (mr) 453 rxe_put(mr); 454 err1: 455 return err; 456 } 457 458 int advance_dma_data(struct rxe_dma_info *dma, unsigned int length) 459 { 460 struct rxe_sge *sge = &dma->sge[dma->cur_sge]; 461 int offset = dma->sge_offset; 462 int resid = dma->resid; 463 464 while (length) { 465 unsigned int bytes; 466 467 if (offset >= sge->length) { 468 sge++; 469 dma->cur_sge++; 470 offset = 0; 471 if (dma->cur_sge >= dma->num_sge) 472 return -ENOSPC; 473 } 474 475 bytes = length; 476 477 if (bytes > sge->length - offset) 478 bytes = sge->length - offset; 479 480 offset += bytes; 481 resid -= bytes; 482 length -= bytes; 483 } 484 485 dma->sge_offset = offset; 486 dma->resid = resid; 487 488 return 0; 489 } 490 491 /* (1) find the mr corresponding to lkey/rkey 492 * depending on lookup_type 493 * (2) verify that the (qp) pd matches the mr pd 494 * (3) verify that the mr can support the requested access 495 * (4) verify that mr state is valid 496 */ 497 struct rxe_mr *lookup_mr(struct rxe_pd *pd, int access, u32 key, 498 enum rxe_mr_lookup_type type) 499 { 500 struct rxe_mr *mr; 501 struct rxe_dev *rxe = to_rdev(pd->ibpd.device); 502 int index = key >> 8; 503 504 mr = rxe_pool_get_index(&rxe->mr_pool, index); 505 if (!mr) 506 return NULL; 507 508 if (unlikely((type == RXE_LOOKUP_LOCAL && mr->lkey != key) || 509 (type == RXE_LOOKUP_REMOTE && mr->rkey != key) || 510 mr_pd(mr) != pd || ((access & mr->access) != access) || 511 mr->state != RXE_MR_STATE_VALID)) { 512 rxe_put(mr); 513 mr = NULL; 514 } 515 516 return mr; 517 } 518 519 int rxe_invalidate_mr(struct rxe_qp *qp, u32 key) 520 { 521 struct rxe_dev *rxe = to_rdev(qp->ibqp.device); 522 struct rxe_mr *mr; 523 int ret; 524 525 mr = rxe_pool_get_index(&rxe->mr_pool, key >> 8); 526 if (!mr) { 527 rxe_dbg_mr(mr, "No MR for key %#x\n", key); 528 ret = -EINVAL; 529 goto err; 530 } 531 532 if (mr->rkey ? (key != mr->rkey) : (key != mr->lkey)) { 533 rxe_dbg_mr(mr, "wr key (%#x) doesn't match mr key (%#x)\n", 534 key, (mr->rkey ? mr->rkey : mr->lkey)); 535 ret = -EINVAL; 536 goto err_drop_ref; 537 } 538 539 if (atomic_read(&mr->num_mw) > 0) { 540 rxe_dbg_mr(mr, "Attempt to invalidate an MR while bound to MWs\n"); 541 ret = -EINVAL; 542 goto err_drop_ref; 543 } 544 545 if (unlikely(mr->ibmr.type != IB_MR_TYPE_MEM_REG)) { 546 rxe_dbg_mr(mr, "Type (%d) is wrong\n", mr->ibmr.type); 547 ret = -EINVAL; 548 goto err_drop_ref; 549 } 550 551 mr->state = RXE_MR_STATE_FREE; 552 ret = 0; 553 554 err_drop_ref: 555 rxe_put(mr); 556 err: 557 return ret; 558 } 559 560 /* user can (re)register fast MR by executing a REG_MR WQE. 561 * user is expected to hold a reference on the ib mr until the 562 * WQE completes. 563 * Once a fast MR is created this is the only way to change the 564 * private keys. It is the responsibility of the user to maintain 565 * the ib mr keys in sync with rxe mr keys. 566 */ 567 int rxe_reg_fast_mr(struct rxe_qp *qp, struct rxe_send_wqe *wqe) 568 { 569 struct rxe_mr *mr = to_rmr(wqe->wr.wr.reg.mr); 570 u32 key = wqe->wr.wr.reg.key; 571 u32 access = wqe->wr.wr.reg.access; 572 573 /* user can only register MR in free state */ 574 if (unlikely(mr->state != RXE_MR_STATE_FREE)) { 575 rxe_dbg_mr(mr, "mr->lkey = 0x%x not free\n", mr->lkey); 576 return -EINVAL; 577 } 578 579 /* user can only register mr with qp in same protection domain */ 580 if (unlikely(qp->ibqp.pd != mr->ibmr.pd)) { 581 rxe_dbg_mr(mr, "qp->pd and mr->pd don't match\n"); 582 return -EINVAL; 583 } 584 585 /* user is only allowed to change key portion of l/rkey */ 586 if (unlikely((mr->lkey & ~0xff) != (key & ~0xff))) { 587 rxe_dbg_mr(mr, "key = 0x%x has wrong index mr->lkey = 0x%x\n", 588 key, mr->lkey); 589 return -EINVAL; 590 } 591 592 mr->access = access; 593 mr->lkey = key; 594 mr->rkey = (access & IB_ACCESS_REMOTE) ? key : 0; 595 mr->ibmr.iova = wqe->wr.wr.reg.mr->iova; 596 mr->state = RXE_MR_STATE_VALID; 597 598 return 0; 599 } 600 601 int rxe_dereg_mr(struct ib_mr *ibmr, struct ib_udata *udata) 602 { 603 struct rxe_mr *mr = to_rmr(ibmr); 604 605 /* See IBA 10.6.7.2.6 */ 606 if (atomic_read(&mr->num_mw) > 0) 607 return -EINVAL; 608 609 rxe_cleanup(mr); 610 611 return 0; 612 } 613 614 void rxe_mr_cleanup(struct rxe_pool_elem *elem) 615 { 616 struct rxe_mr *mr = container_of(elem, typeof(*mr), elem); 617 int i; 618 619 rxe_put(mr_pd(mr)); 620 ib_umem_release(mr->umem); 621 622 if (mr->map) { 623 for (i = 0; i < mr->num_map; i++) 624 kfree(mr->map[i]); 625 626 kfree(mr->map); 627 } 628 } 629